Skip to content

Commit 2aa4aa9

Browse files
BibMartinjreback
authored andcommitted
ENH: _dir_additions returns also the first level of a MultiIndex (#16326)
1 parent e909ea0 commit 2aa4aa9

File tree

5 files changed

+65
-5
lines changed

5 files changed

+65
-5
lines changed

asv_bench/benchmarks/series_methods.py

+10
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,13 @@ def time_value_counts_float64(self):
177177

178178
def time_value_counts_strings(self):
179179
self.s.value_counts()
180+
181+
182+
class series_dir(object):
183+
goal_time = 0.2
184+
185+
def setup(self):
186+
self.s = Series(index=tm.makeStringIndex(10000))
187+
188+
def time_dir_strings(self):
189+
dir(self.s)

doc/source/whatsnew/v0.22.0.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ Other Enhancements
135135
- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`)
136136
- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
137137
- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
138+
- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
138139
- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
139140
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
140141

@@ -232,9 +233,10 @@ Performance Improvements
232233
- The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
233234
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
234235
- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
236+
- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`)
235237
- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
236238
- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
237-
- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time`
239+
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
238240

239241
.. _whatsnew_0220.docs:
240242

pandas/core/generic.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,12 @@ def __unicode__(self):
195195
return '%s(%s)' % (self.__class__.__name__, prepr)
196196

197197
def _dir_additions(self):
198-
""" add the string-like attributes from the info_axis """
199-
additions = set([c for c in self._info_axis
200-
if isinstance(c, string_types) and isidentifier(c)])
198+
""" add the string-like attributes from the info_axis.
199+
If info_axis is a MultiIndex, it's first level values are used.
200+
"""
201+
additions = set(
202+
[c for c in self._info_axis.unique(level=0)[:100]
203+
if isinstance(c, string_types) and isidentifier(c)])
201204
return super(NDFrame, self)._dir_additions().union(additions)
202205

203206
@property

pandas/tests/frame/test_api.py

+18
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,24 @@ def test_column_contains_typeerror(self):
128128
except TypeError:
129129
pass
130130

131+
def test_tab_completion(self):
132+
# DataFrame whose columns are identifiers shall have them in __dir__.
133+
df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
134+
for key in list('ABCD'):
135+
assert key in dir(df)
136+
assert isinstance(df.__getitem__('A'), pd.Series)
137+
138+
# DataFrame whose first-level columns are identifiers shall have
139+
# them in __dir__.
140+
df = pd.DataFrame(
141+
[list('abcd'), list('efgh')],
142+
columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
143+
for key in list('ABCD'):
144+
assert key in dir(df)
145+
for key in list('EFGH'):
146+
assert key not in dir(df)
147+
assert isinstance(df.__getitem__('A'), pd.DataFrame)
148+
131149
def test_not_hashable(self):
132150
df = self.klass([1])
133151
pytest.raises(TypeError, hash, df)

pandas/tests/series/test_api.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas import Index, Series, DataFrame, date_range
1111
from pandas.core.indexes.datetimes import Timestamp
1212

13-
from pandas.compat import range
13+
from pandas.compat import range, lzip, isidentifier, string_types
1414
from pandas import (compat, Categorical, period_range, timedelta_range,
1515
DatetimeIndex, PeriodIndex, TimedeltaIndex)
1616
import pandas.io.formats.printing as printing
@@ -250,6 +250,33 @@ def get_dir(s):
250250
results = get_dir(s)
251251
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
252252

253+
@pytest.mark.parametrize("index", [
254+
tm.makeUnicodeIndex(10),
255+
tm.makeStringIndex(10),
256+
tm.makeCategoricalIndex(10),
257+
Index(['foo', 'bar', 'baz'] * 2),
258+
tm.makeDateIndex(10),
259+
tm.makePeriodIndex(10),
260+
tm.makeTimedeltaIndex(10),
261+
tm.makeIntIndex(10),
262+
tm.makeUIntIndex(10),
263+
tm.makeIntIndex(10),
264+
tm.makeFloatIndex(10),
265+
Index([True, False]),
266+
Index(['a{}'.format(i) for i in range(101)]),
267+
pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
268+
pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ])
269+
def test_index_tab_completion(self, index):
270+
# dir contains string-like values of the Index.
271+
s = pd.Series(index=index)
272+
dir_s = dir(s)
273+
for i, x in enumerate(s.index.unique(level=0)):
274+
if i < 100:
275+
assert (not isinstance(x, string_types) or
276+
not isidentifier(x) or x in dir_s)
277+
else:
278+
assert x not in dir_s
279+
253280
def test_not_hashable(self):
254281
s_empty = Series()
255282
s = Series([1])

0 commit comments

Comments
 (0)