ENH: _dir_additions returns also the first level of a MultiIndex (#16326)

BibMartin · jreback · commit 2aa4aa947df5 · 2017-12-11T06:23:44.000-05:00
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -177,3 +177,13 @@ def time_value_counts_float64(self):
 
     def time_value_counts_strings(self):
         self.s.value_counts()
+
+
+class series_dir(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = Series(index=tm.makeStringIndex(10000))
+
+    def time_dir_strings(self):
+        dir(self.s)
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -135,6 +135,7 @@ Other Enhancements
 - Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`)
 - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
 - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
+- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
 - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
 - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 
@@ -232,9 +233,10 @@ Performance Improvements
 - The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
 - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
 - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
+- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`)
 - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
 - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
-- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time`
+- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
 
 .. _whatsnew_0220.docs:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -195,9 +195,12 @@ def __unicode__(self):
         return '%s(%s)' % (self.__class__.__name__, prepr)
 
     def _dir_additions(self):
-        """ add the string-like attributes from the info_axis """
-        additions = set([c for c in self._info_axis
-                         if isinstance(c, string_types) and isidentifier(c)])
+        """ add the string-like attributes from the info_axis.
+        If info_axis is a MultiIndex, it's first level values are used.
+        """
+        additions = set(
+            [c for c in self._info_axis.unique(level=0)[:100]
+             if isinstance(c, string_types) and isidentifier(c)])
         return super(NDFrame, self)._dir_additions().union(additions)
 
     @property
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -128,6 +128,24 @@ def test_column_contains_typeerror(self):
         except TypeError:
             pass
 
+    def test_tab_completion(self):
+        # DataFrame whose columns are identifiers shall have them in __dir__.
+        df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
+        for key in list('ABCD'):
+            assert key in dir(df)
+        assert isinstance(df.__getitem__('A'), pd.Series)
+
+        # DataFrame whose first-level columns are identifiers shall have
+        # them in __dir__.
+        df = pd.DataFrame(
+            [list('abcd'), list('efgh')],
+            columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
+        for key in list('ABCD'):
+            assert key in dir(df)
+        for key in list('EFGH'):
+            assert key not in dir(df)
+        assert isinstance(df.__getitem__('A'), pd.DataFrame)
+
     def test_not_hashable(self):
         df = self.klass([1])
         pytest.raises(TypeError, hash, df)
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -10,7 +10,7 @@
 from pandas import Index, Series, DataFrame, date_range
 from pandas.core.indexes.datetimes import Timestamp
 
-from pandas.compat import range
+from pandas.compat import range, lzip, isidentifier, string_types
 from pandas import (compat, Categorical, period_range, timedelta_range,
                     DatetimeIndex, PeriodIndex, TimedeltaIndex)
 import pandas.io.formats.printing as printing
@@ -250,6 +250,33 @@ def get_dir(s):
         results = get_dir(s)
         tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
 
+    @pytest.mark.parametrize("index", [
+        tm.makeUnicodeIndex(10),
+        tm.makeStringIndex(10),
+        tm.makeCategoricalIndex(10),
+        Index(['foo', 'bar', 'baz'] * 2),
+        tm.makeDateIndex(10),
+        tm.makePeriodIndex(10),
+        tm.makeTimedeltaIndex(10),
+        tm.makeIntIndex(10),
+        tm.makeUIntIndex(10),
+        tm.makeIntIndex(10),
+        tm.makeFloatIndex(10),
+        Index([True, False]),
+        Index(['a{}'.format(i) for i in range(101)]),
+        pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
+        pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ])
+    def test_index_tab_completion(self, index):
+        # dir contains string-like values of the Index.
+        s = pd.Series(index=index)
+        dir_s = dir(s)
+        for i, x in enumerate(s.index.unique(level=0)):
+            if i < 100:
+                assert (not isinstance(x, string_types) or
+                        not isidentifier(x) or x in dir_s)
+            else:
+                assert x not in dir_s
+
     def test_not_hashable(self):
         s_empty = Series()
         s = Series([1])