Skip to content

Commit be76736

Browse files
committed
ENH: added a cache of DataFrame columns to improve perf with many calls to __getitem__
1 parent 0f59729 commit be76736

File tree

4 files changed

+38
-8
lines changed

4 files changed

+38
-8
lines changed

pandas/core/frame.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
163163
raise PandasError('DataFrame constructor not properly called!')
164164

165165
self._data = mgr
166+
self._series_cache = {}
166167

167168
def _init_dict(self, data, index, columns, dtype=None):
168169
"""
@@ -613,8 +614,15 @@ def _get_dtype_counts(self):
613614
#----------------------------------------------------------------------
614615
# properties for index and columns
615616

617+
def _get_columns(self):
618+
return self._data.axes[0]
619+
620+
def _set_columns(self, value):
621+
self._data.set_axis(0, value)
622+
self._series_cache.clear()
623+
columns = property(fset=_set_columns, fget=_get_columns)
624+
616625
# reference underlying BlockManager
617-
columns = AxisProperty(0)
618626
index = AxisProperty(1)
619627

620628
def as_matrix(self, columns=None):
@@ -653,6 +661,7 @@ def __setstate__(self, state):
653661
else: # pragma: no cover
654662
# old pickling format, for compatibility
655663
self._unpickle_matrix_compat(state)
664+
self._series_cache = {}
656665

657666
def _unpickle_frame_compat(self, state): # pragma: no cover
658667
from pandas.core.common import _unpickle_array
@@ -770,8 +779,14 @@ def _getitem_multilevel(self, key):
770779
return self._getitem_single(key)
771780

772781
def _getitem_single(self, key):
782+
res = self._series_cache.get(key)
783+
if res is not None:
784+
return res
785+
773786
values = self._data.get(key)
774-
return Series(values, index=self.index)
787+
res = Series(values, index=self.index)
788+
self._series_cache[key] = res
789+
return res
775790

776791
def __setitem__(self, key, value):
777792
"""
@@ -834,6 +849,11 @@ def _set_item(self, key, value):
834849
value = np.atleast_2d(value) # is this a hack?
835850
self._data.set(key, value)
836851

852+
try:
853+
del self._series_cache[key]
854+
except KeyError:
855+
pass
856+
837857
def _sanitize_column(self, value):
838858
# Need to make sure new columns (which go into the BlockManager as new
839859
# blocks) are always copied
@@ -864,6 +884,11 @@ def __delitem__(self, key):
864884
"""
865885
self._data.delete(key)
866886

887+
try:
888+
del self._series_cache[key]
889+
except KeyError:
890+
pass
891+
867892
def pop(self, item):
868893
"""
869894
Return column and drop from frame. Raise KeyError if not
@@ -1219,9 +1244,11 @@ def rename(self, index=None, columns=None):
12191244

12201245
def _rename_index_inplace(self, mapper):
12211246
self._data = self._data.rename_axis(mapper, axis=1)
1247+
self._series_cache.clear()
12221248

12231249
def _rename_columns_inplace(self, mapper):
12241250
self._data = self._data.rename_items(mapper)
1251+
self._series_cache.clear()
12251252

12261253
#----------------------------------------------------------------------
12271254
# Arithmetic / combination related

pandas/core/indexing.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,20 @@ def __init__(self, frame):
9090
self.frame = frame
9191

9292
def __getitem__(self, key):
93-
frame = self.frame
9493
if isinstance(key, slice):
9594
return self._fancy_getitem_axis(key, axis=0)
9695
elif isinstance(key, tuple):
97-
if len(key) != 2:
98-
raise Exception('only length 2 tuple supported')
99-
return self._fancy_getitem_tuple(*key)
96+
return self._getitem_tuple(key)
10097
elif _is_list_like(key):
10198
return self._fancy_getitem(key, axis=0)
10299
else:
103100
return self._fancy_getitem_axis(key, axis=0)
104101

102+
def _getitem_tuple(self, key):
103+
if len(key) != 2:
104+
raise Exception('only length 2 tuple supported')
105+
return self._fancy_getitem_tuple(*key)
106+
105107
def __setitem__(self, key, value):
106108
# also has the side effect of consolidating in-place
107109
if self.frame._is_mixed_type:

pandas/core/series.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.core.daterange import DateRange
1919
from pandas.core.generic import PandasObject
2020
from pandas.core.index import Index, MultiIndex
21+
from pandas.core.indexing import _SeriesIndexer, _maybe_droplevels
2122
import pandas.core.datetools as datetools
2223
import pandas._tseries as _tseries
2324

@@ -1477,8 +1478,6 @@ def _lastTimeWithValue(self): # pragma: no cover
14771478
_ix = None
14781479
@property
14791480
def ix(self):
1480-
from pandas.core.indexing import _SeriesIndexer
1481-
14821481
if self._ix is None:
14831482
self._ix = _SeriesIndexer(self)
14841483

pandas/tests/test_internals.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# pylint: disable=W0102
2+
13
import unittest
24

35
import numpy as np

0 commit comments

Comments
 (0)