From 6840e113db62c518c7e97aece2497bbb79d9faed Mon Sep 17 00:00:00 2001 From: Martin Journois Date: Thu, 11 May 2017 11:21:04 +0200 Subject: [PATCH 1/6] ENH: _dir_additions returns only the 100 first values of the index' first level --- pandas/core/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ea4a645927d7b..25563df0d8b08 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -195,9 +195,12 @@ def __unicode__(self): return '%s(%s)' % (self.__class__.__name__, prepr) def _dir_additions(self): - """ add the string-like attributes from the info_axis """ - additions = set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + """ add the string-like attributes from the info_axis. + If info_axis is a MultiIndex, it's first level values are used. + """ + additions = set( + [c for c in self._info_axis.unique(level=0)[:100] + if isinstance(c, string_types) and isidentifier(c)]) return super(NDFrame, self)._dir_additions().union(additions) @property From 4ea43bff26d6d5c0a1071c1359e4b55f160e0a34 Mon Sep 17 00:00:00 2001 From: Martin Journois Date: Sat, 13 May 2017 10:26:40 +0200 Subject: [PATCH 2/6] TST: Add test on DataFrame columns auto-completion --- pandas/tests/frame/test_api.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 7e952a87688bc..91fe7f99ca681 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -128,6 +128,24 @@ def test_column_contains_typeerror(self): except TypeError: pass + def test_tab_completion(self): + # DataFrame whose columns are identifiers shall have them in __dir__. + df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD')) + for key in list('ABCD'): + assert key in dir(df) + assert isinstance(df.__getitem__('A'), pd.Series) + + # DataFrame whose first-level columns are identifiers shall have + # them in __dir__. + df = pd.DataFrame( + [list('abcd'), list('efgh')], + columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH')))) + for key in list('ABCD'): + assert key in dir(df) + for key in list('EFGH'): + assert key not in dir(df) + assert isinstance(df.__getitem__('A'), pd.DataFrame) + def test_not_hashable(self): df = self.klass([1]) pytest.raises(TypeError, hash, df) From bee67d16268670726d0384e3bebe03a19b021cfd Mon Sep 17 00:00:00 2001 From: Martin Journois Date: Fri, 1 Dec 2017 10:21:48 +0100 Subject: [PATCH 3/6] TST: ADD test on Series index auto-completion --- pandas/tests/series/test_api.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 2898ace27f535..617ca2199f588 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -10,7 +10,7 @@ from pandas import Index, Series, DataFrame, date_range from pandas.core.indexes.datetimes import Timestamp -from pandas.compat import range +from pandas.compat import range, lzip, isidentifier, string_types from pandas import (compat, Categorical, period_range, timedelta_range, DatetimeIndex, PeriodIndex, TimedeltaIndex) import pandas.io.formats.printing as printing @@ -250,6 +250,33 @@ def get_dir(s): results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) + @pytest.mark.parametrize("index", [ + tm.makeUnicodeIndex(10), + tm.makeStringIndex(10), + tm.makeCategoricalIndex(10), + Index(['foo', 'bar', 'baz'] * 2), + tm.makeDateIndex(10), + tm.makePeriodIndex(10), + tm.makeTimedeltaIndex(10), + tm.makeIntIndex(10), + tm.makeUIntIndex(10), + tm.makeIntIndex(10), + tm.makeFloatIndex(10), + Index([True, False]), + Index(['a{}'.format(i) for i in range(101)]), + pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')), + pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ]) + def test_index_tab_completion(self, index): + # dir contains string-like values of the Index. + s = pd.Series(index=index) + dir_s = dir(s) + for i, x in enumerate(s.index.unique(level=0)): + if i < 100: + assert (not isinstance(x, string_types) or + not isidentifier(x) or x in dir_s) + else: + assert x not in dir_s + def test_not_hashable(self): s_empty = Series() s = Series([1]) From edb184aede7c9b1512aaa2ef82fbe5f00fc6098f Mon Sep 17 00:00:00 2001 From: Martin Journois Date: Tue, 5 Dec 2017 18:05:23 +0100 Subject: [PATCH 4/6] DOC: Update whatsnew about NDFrame._dir_additions enhancements (#16326, #18587) --- doc/source/whatsnew/v0.22.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index c3a0e3599a0f9..1c346d8ed6f8e 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -135,7 +135,7 @@ Other Enhancements - Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) - +- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) .. _whatsnew_0220.api_breaking: @@ -230,6 +230,7 @@ Performance Improvements - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) .. _whatsnew_0220.docs: From a46e9e9bdc2a4a169b0f74a59fabf3ab87136cfb Mon Sep 17 00:00:00 2001 From: Martin Journois Date: Wed, 6 Dec 2017 15:11:06 +0100 Subject: [PATCH 5/6] PERF: Add ASV on Series dir --- asv_bench/benchmarks/series_methods.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 81c43f7bc975f..0ce003d1a9277 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -177,3 +177,13 @@ def time_value_counts_float64(self): def time_value_counts_strings(self): self.s.value_counts() + + +class series_dir(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(index=tm.makeStringIndex(10000)) + + def time_dir_strings(self): + dir(self.s) From 4ee5b9fa793fba46b9b10b05f802b38ea7641b03 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 11 Dec 2017 06:21:43 -0500 Subject: [PATCH 6/6] fix whatsnew --- doc/source/whatsnew/v0.22.0.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 76195377ba0b9..2ea44722d343d 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -135,12 +135,9 @@ Other Enhancements - Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) -<<<<<<< HEAD - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) -======= - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) ->>>>>>> master .. _whatsnew_0220.api_breaking: @@ -236,10 +233,10 @@ Performance Improvements - The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) -- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` .. _whatsnew_0220.docs: