Skip to content

Commit 0958d5f

Browse files
committed
Merge pull request #4639 from jreback/gi_perf
PERF: optimize __getitem__ in DataFrame for columns lookup
2 parents a45206d + 205b5cb commit 0958d5f

File tree

3 files changed

+39
-7
lines changed

3 files changed

+39
-7
lines changed

doc/source/release.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ pandas 0.13
5959
- A Series of dtype ``timedelta64[ns]`` can now be divided by another
6060
``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
6161
is frequency conversion.
62+
- Performance improvements with ``__getitem__`` on ``DataFrames`` with
63+
when the key is a column
6264

6365
**API Changes**
6466

@@ -183,7 +185,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
183185
- Refactor ``Series.reindex`` to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing
184186
on a Series to work
185187
- ``Series.copy`` no longer accepts the ``order`` parameter and is now consistent with ``NDFrame`` copy
186-
- Refactor ``rename`` methods to core/generic.py; fixes ``Series.rename`` for (:issue`4605`), and adds ``rename``
188+
- Refactor ``rename`` methods to core/generic.py; fixes ``Series.rename`` for (:issue:`4605`), and adds ``rename``
187189
with the same signature for ``Panel``
188190

189191
**Experimental Features**

pandas/core/frame.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -1810,6 +1810,14 @@ def iget_value(self, i, j):
18101810

18111811
def __getitem__(self, key):
18121812

1813+
# shortcut if we are an actual column
1814+
is_mi_columns = isinstance(self.columns, MultiIndex)
1815+
try:
1816+
if key in self.columns and not is_mi_columns:
1817+
return self._getitem_column(key)
1818+
except:
1819+
pass
1820+
18131821
# see if we can slice the rows
18141822
indexer = _convert_to_index_sliceable(self, key)
18151823
if indexer is not None:
@@ -1820,15 +1828,20 @@ def __getitem__(self, key):
18201828
return self._getitem_array(key)
18211829
elif isinstance(key, DataFrame):
18221830
return self._getitem_frame(key)
1823-
elif isinstance(self.columns, MultiIndex):
1831+
elif is_mi_columns:
18241832
return self._getitem_multilevel(key)
18251833
else:
1826-
# get column
1827-
if self.columns.is_unique:
1828-
return self._get_item_cache(key)
1834+
return self._getitem_column(key)
1835+
1836+
def _getitem_column(self, key):
1837+
""" return the actual column """
1838+
1839+
# get column
1840+
if self.columns.is_unique:
1841+
return self._get_item_cache(key)
18291842

1830-
# duplicate columns
1831-
return self._constructor(self._data.get(key))
1843+
# duplicate columns
1844+
return self._constructor(self._data.get(key))
18321845

18331846
def _getitem_slice(self, key):
18341847
return self._slice(key, axis=0)

vb_suite/frame_methods.py

+17
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484

8585
setup = common_setup + """
8686
df = DataFrame(randn(10000, 1000))
87+
df2 = DataFrame(randn(3000,1),columns=['A'])
88+
df3 = DataFrame(randn(3000,1))
8789
8890
def f():
8991
if hasattr(df, '_item_cache'):
@@ -94,6 +96,15 @@ def f():
9496
def g():
9597
for name, col in df.iteritems():
9698
pass
99+
100+
def h():
101+
for i in xrange(10000):
102+
df2['A']
103+
104+
def j():
105+
for i in xrange(10000):
106+
df3[0]
107+
97108
"""
98109

99110
# as far back as the earliest test currently in the suite
@@ -103,6 +114,12 @@ def g():
103114
frame_iteritems_cached = Benchmark('g()', setup,
104115
start_date=datetime(2010, 6, 1))
105116

117+
frame_getitem_single_column = Benchmark('h()', setup,
118+
start_date=datetime(2010, 6, 1))
119+
120+
frame_getitem_single_column2 = Benchmark('j()', setup,
121+
start_date=datetime(2010, 6, 1))
122+
106123
#----------------------------------------------------------------------
107124
# to_string
108125

0 commit comments

Comments
 (0)