From fb577b727f21cce4932389ffda84304989ac2b8f Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 02:11:32 -0500 Subject: [PATCH 1/5] ENH: allow index to be referenced by name closes #8162, #10816 --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/frame.py | 25 ++++++++++- pandas/core/generic.py | 25 ++++++++--- pandas/tests/test_generic.py | 75 +++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index b3bbc5cf5ef8c..0f0c925b366a5 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -432,6 +432,7 @@ Other enhancements - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here ` - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - The entire codebase has been ``PEP``-ified (:issue:`12096`) +- Index (or index levels, with a MultiIndex) can now be referenced like column names (:issue:`8162`, :issue:`10816`). .. _whatsnew_0180.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd32ff2133cae..2e739c3e20181 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2018,7 +2018,30 @@ def _getitem_array(self, key): indexer = key.nonzero()[0] return self.take(indexer, axis=0, convert=False) else: - indexer = self.ix._convert_to_indexer(key, axis=1) + try: + indexer = self.ix._convert_to_indexer(key, axis=1) + + except KeyError: + if self.index.name in key: + ix_name = self.index.name + ix_ix = key.index(ix_name) + + elif (isinstance(self.index, MultiIndex) and + any(item in self.index.names for item in key)): + for item in key: + if item in self.index.names: + ix_name = item + ix_ix = key.index(item) + + else: + raise + + key.remove(ix_name) + ix_col = self[ix_name] + other_cols = self[key] + other_cols.insert(ix_ix, ix_name, ix_col) + return other_cols + return self.take(indexer, axis=1, convert=True) def _getitem_multilevel(self, key): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14d788fdded7e..f32365a89f27c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1306,8 +1306,20 @@ def _get_item_cache(self, item): cache = self._item_cache res = cache.get(item) if res is None: - values = self._data.get(item) - res = self._box_item_values(item, values) + try: + values = self._data.get(item) + res = self._box_item_values(item, values) + except KeyError: + if hasattr(self, 'index') and self.index.name == item: + res = self.index.to_series() + + elif (isinstance(self.index, MultiIndex) and + item in self.index.names): + res = pd.Series(self.index.get_level_values(item).values, + index=self.index, name=item) + + else: + raise cache[item] = res res._set_as_cached(item, self) @@ -2623,10 +2635,13 @@ def __getattr__(self, name): if (name in self._internal_names_set or name in self._metadata or name in self._accessors): return object.__getattribute__(self, name) - else: - if name in self._info_axis: + elif ( + name in self._info_axis or + name == self.index.name or + (isinstance(self.index, MultiIndex) and name in self.index.names) + ): return self[name] - return object.__getattribute__(self, name) + return object.__getattribute__(self, name) def __setattr__(self, name, value): """After regular attribute access, try setting the name diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 4c7510783eda0..b80522e2f76f0 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1853,6 +1853,81 @@ def test_to_xarray(self): expected, check_index_type=False) + def test_getitem_index(self): + # GH8162 + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + expected = pd.Series(['a', 'b', 'c'], index=idx, name='idx') + + assert_series_equal(df['idx'], expected) + assert_series_equal(df.idx, expected) + + def test_getitem_index_listlike(self): + idx = pd.Index(list('abc'), name='idx') + df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=idx) + assert_frame_equal( + df[['idx', 'B']], + pd.DataFrame([ + ['a', 4], + ['b', 5], + ['c', 6], + ], + columns=['idx', 'B'], + index=idx) + ) + assert_frame_equal( + df[['idx', 'A', 'B']], + pd.DataFrame([ + ['a', 1, 4], + ['b', 2, 5], + ['c', 3, 6], + ], + columns=['idx', 'A', 'B'], + index=idx) + ) + + def test_getitem_multiindex_level(self): + # GH10816 + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + expected = pd.Series(list('aabbcc'), index=idx, name='lev0') + + assert_series_equal(df['lev0'], expected) + assert_series_equal(df.lev0, expected) + + def test_getitem_multiindex_level_listlike(self): + idx = pd.MultiIndex.from_product([list('abc'), list('fg')], + names=['lev0', 'lev1']) + df = pd.DataFrame({'A': range(6), 'B': range(10, 16)}, index=idx) + assert_frame_equal( + df[['A', 'lev1']], + pd.DataFrame([ + [0, 'f'], + [1, 'g'], + [2, 'f'], + [3, 'g'], + [4, 'f'], + [5, 'g'], + ], + columns=['A', 'lev1'], + index=idx) + ) + + assert_frame_equal( + df[['A', 'B', 'lev1', 'lev0']], + pd.DataFrame([ + [0, 10, 'f', 'a'], + [1, 11, 'g', 'a'], + [2, 12, 'f', 'b'], + [3, 13, 'g', 'b'], + [4, 14, 'f', 'c'], + [5, 15, 'g', 'c'], + ], + columns=['A', 'B', 'lev1', 'lev0'], + index=idx) + ) + class TestPanel(tm.TestCase, Generic): _typ = Panel From 4b3469d8a5c8377e20904253d7b03f31ace06d73 Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 11:42:31 -0500 Subject: [PATCH 2/5] fix panels and cleanup --- pandas/core/frame.py | 11 ++++------- pandas/core/generic.py | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2e739c3e20181..efd639a8704a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2025,14 +2025,11 @@ def _getitem_array(self, key): if self.index.name in key: ix_name = self.index.name ix_ix = key.index(ix_name) - - elif (isinstance(self.index, MultiIndex) and + elif (hasattr(self, 'index') and + isinstance(self.index, MultiIndex) and any(item in self.index.names for item in key)): - for item in key: - if item in self.index.names: - ix_name = item - ix_ix = key.index(item) - + ix_ix, ix_name = next((i, k) for i, k in enumerate(key) + if k in self.index.names) else: raise diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f32365a89f27c..9bd035e9ed25a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1313,11 +1313,11 @@ def _get_item_cache(self, item): if hasattr(self, 'index') and self.index.name == item: res = self.index.to_series() - elif (isinstance(self.index, MultiIndex) and + elif (hasattr(self, 'index') and + isinstance(self.index, MultiIndex) and item in self.index.names): res = pd.Series(self.index.get_level_values(item).values, index=self.index, name=item) - else: raise cache[item] = res From 358c0fc29d39f9287c5cd82e64602ee7738b9eed Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 12:20:53 -0500 Subject: [PATCH 3/5] refactor --- pandas/core/frame.py | 31 ++++++++++++++----------------- pandas/core/generic.py | 14 ++++++-------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efd639a8704a6..11eb8ae4afd1a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2020,24 +2020,11 @@ def _getitem_array(self, key): else: try: indexer = self.ix._convert_to_indexer(key, axis=1) - except KeyError: - if self.index.name in key: - ix_name = self.index.name - ix_ix = key.index(ix_name) - elif (hasattr(self, 'index') and - isinstance(self.index, MultiIndex) and - any(item in self.index.names for item in key)): - ix_ix, ix_name = next((i, k) for i, k in enumerate(key) - if k in self.index.names) - else: - raise - - key.remove(ix_name) - ix_col = self[ix_name] - other_cols = self[key] - other_cols.insert(ix_ix, ix_name, ix_col) - return other_cols + if (hasattr(self, 'index') and + any(item in self.index.names for item in key)): + return self._getitem_array_with_index_name(key) + raise return self.take(indexer, axis=1, convert=True) @@ -2074,6 +2061,16 @@ def _getitem_frame(self, key): raise ValueError('Must pass DataFrame with boolean values only') return self.where(key) + def _getitem_array_with_index_name(self, key): + ix_ix, ix_name = next((i, k) for i, k in enumerate(key) + if k in self.index.names) + key.remove(ix_name) + ix_col = self[ix_name] + result = self[key] + result.insert(ix_ix, ix_name, ix_col) + return result + + def query(self, expr, inplace=False, **kwargs): """Query the columns of a frame with a boolean expression. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9bd035e9ed25a..2a85cf1980cf4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1310,14 +1310,8 @@ def _get_item_cache(self, item): values = self._data.get(item) res = self._box_item_values(item, values) except KeyError: - if hasattr(self, 'index') and self.index.name == item: - res = self.index.to_series() - - elif (hasattr(self, 'index') and - isinstance(self.index, MultiIndex) and - item in self.index.names): - res = pd.Series(self.index.get_level_values(item).values, - index=self.index, name=item) + if hasattr(self, 'index') and item in self.index.names: + res = self._get_item_index_name(item) else: raise cache[item] = res @@ -1327,6 +1321,10 @@ def _get_item_cache(self, item): res.is_copy = self.is_copy return res + def _get_item_index_name(self, item): + return pd.Series(self.index.get_level_values(item), + index=self.index, name=item) + def _set_as_cached(self, item, cacher): """Set the _cacher attribute on the calling object with a weakref to cacher. From ee7a8d9a0e717400576fc97a7d6d4949a72594ea Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 12:21:28 -0500 Subject: [PATCH 4/5] pep8 --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 11eb8ae4afd1a..5ddceeee7e548 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2070,7 +2070,6 @@ def _getitem_array_with_index_name(self, key): result.insert(ix_ix, ix_name, ix_col) return result - def query(self, expr, inplace=False, **kwargs): """Query the columns of a frame with a boolean expression. From 6db4f4b18ba21d61cfa301ba2fe1665c4749316e Mon Sep 17 00:00:00 2001 From: "Henry S. Harrison" Date: Sun, 21 Feb 2016 12:28:18 -0500 Subject: [PATCH 5/5] missed a spot --- pandas/core/generic.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2a85cf1980cf4..bd9cfc28ebc33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2633,11 +2633,7 @@ def __getattr__(self, name): if (name in self._internal_names_set or name in self._metadata or name in self._accessors): return object.__getattribute__(self, name) - elif ( - name in self._info_axis or - name == self.index.name or - (isinstance(self.index, MultiIndex) and name in self.index.names) - ): + elif name in self._info_axis or name in self.index.names: return self[name] return object.__getattribute__(self, name)