pandas-dev · jreback · Dec 20, 2016
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -68,6 +68,8 @@ class Iteration(object):
     def setup(self):
         self.df = DataFrame(randn(10000, 1000))
         self.df2 = DataFrame(np.random.randn(50000, 10))
+        self.df3 = pd.DataFrame(np.random.randn(1000,5000),
+                                columns=['C'+str(c) for c in range(5000)])
 
     def f(self):
         if hasattr(self.df, '_item_cache'):
@@ -85,6 +87,11 @@ def time_iteritems(self):
     def time_iteritems_cached(self):
         self.g()
 
+    def time_iteritems_indexing(self):
+        df = self.df3
+        for col in df:
+            df[col]
+
     def time_itertuples(self):
         for row in self.df2.itertuples():
             pass

diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
@@ -23,6 +23,7 @@ Performance Improvements
 
 - Improved performance of ``.replace()`` (:issue:`12745`)
 - Improved performance of ``PeriodIndex`` (:issue:`14822`)
+- Performance regression in indexing with getitem (:issue:`14930`)
 - Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`)
 
 .. _whatsnew_0192.enhancements.other:

diff --git a/pandas/index.pyx b/pandas/index.pyx
@@ -82,20 +82,13 @@ cdef class IndexEngine:
 
     cdef:
         bint unique, monotonic_inc, monotonic_dec
-        bint initialized, monotonic_check, unique_check
+        bint need_monotonic_check, need_unique_check
 
     def __init__(self, vgetter, n):
         self.vgetter = vgetter
 
         self.over_size_threshold = n >= _SIZE_CUTOFF
-
-        self.initialized = 0
-        self.monotonic_check = 0
-        self.unique_check = 0
-
-        self.unique = 0
-        self.monotonic_inc = 0
-        self.monotonic_dec = 0
+        self.clear_mapping()
 
     def __contains__(self, object val):
         self._ensure_mapping_populated()
@@ -213,24 +206,28 @@ cdef class IndexEngine:
     property is_unique:
 
         def __get__(self):
-            if not self.initialized:
-                self.initialize()
+            if self.need_unique_check:
+                self._do_unique_check()
 
-            self.unique_check = 1
             return self.unique == 1
 
+    cdef inline _do_unique_check(self):
+
+        # this de-facto the same
+        self._ensure_mapping_populated()
+
     property is_monotonic_increasing:
 
         def __get__(self):
-            if not self.monotonic_check:
+            if self.need_monotonic_check:
                 self._do_monotonic_check()
 
             return self.monotonic_inc == 1
 
     property is_monotonic_decreasing:
 
         def __get__(self):
-            if not self.monotonic_check:
+            if self.need_monotonic_check:
                 self._do_monotonic_check()
 
             return self.monotonic_dec == 1
@@ -246,13 +243,12 @@ cdef class IndexEngine:
             self.monotonic_dec = 0
             is_unique = 0
 
-        self.monotonic_check = 1
+        self.need_monotonic_check = 0
 
         # we can only be sure of uniqueness if is_unique=1
         if is_unique:
-            self.initialized = 1
             self.unique = 1
-            self.unique_check = 1
+            self.need_unique_check = 0
 
     cdef _get_index_values(self):
         return self.vgetter()
@@ -266,30 +262,32 @@ cdef class IndexEngine:
     cdef _check_type(self, object val):
         hash(val)
 
+    property is_mapping_populated:
+
+        def __get__(self):
+            return self.mapping is not None
+
     cdef inline _ensure_mapping_populated(self):
-        # need to reset if we have previously
-        # set the initialized from monotonic checks
-        if self.unique_check:
-            self.initialized = 0
-        if not self.initialized:
-            self.initialize()
-
-    cdef initialize(self):
-        values = self._get_index_values()
+        # this populates the mapping
+        # if its not already populated
+        # also satisfies the need_unique_check
 
-        self.mapping = self._make_hash_table(len(values))
-        self.mapping.map_locations(values)
+        if not self.is_mapping_populated:
 
-        if len(self.mapping) == len(values):
-            self.unique = 1
+            values = self._get_index_values()
+
+            self.mapping = self._make_hash_table(len(values))
+            self.mapping.map_locations(values)
+
+            if len(self.mapping) == len(values):
+                self.unique = 1
 
-        self.initialized = 1
+        self.need_unique_check = 0
 
     def clear_mapping(self):
         self.mapping = None
-        self.initialized = 0
-        self.monotonic_check = 0
-        self.unique_check = 0
+        self.need_monotonic_check = 1
+        self.need_unique_check = 1
 
         self.unique = 0
         self.monotonic_inc = 0