From 03098012f8544d31825af76c60e47a003c945ac9 Mon Sep 17 00:00:00 2001 From: Guillaume Gay Date: Fri, 31 Jan 2014 10:47:47 +0100 Subject: [PATCH] BUG/TST raise a more detailed error when GH6169 occurs, added a test Raise a detailed error when a `columns` argument is passed through 'where' to select a multiIndexed Dataframe from an HDF store. Wrote a test showcasing the bug modified: pandas/io/pytables.py modified: pandas/io/tests/test_pytables.py making it work --- pandas/io/pytables.py | 12 ++++++++++-- pandas/io/tests/test_pytables.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bb487f5102e0a..9d1ce4f4b82bc 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3958,8 +3958,16 @@ def read(self, columns=None, **kwargs): columns.insert(0, n) df = super(AppendableMultiFrameTable, self).read( columns=columns, **kwargs) - df = df.set_index(self.levels) - + try: + df = df.set_index(self.levels) + except KeyError: + if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr: + raise KeyError( + "Indexes columns were not retrieved because you passed " + "a `where` argument containing columns specification. " + "(see http://github.com/pydata/pandas/issues/6169), try passing " + "the columns specification through the `columns` keyword instead" + ) # remove names for 'level_%d' df.index = df.index.set_names([ None if self._re_levels.search(l) else l for l in df.index.names diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 9c56ee468f6ac..29f536b3bf5d9 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1673,6 +1673,33 @@ def make_index(names=None): store.append('df',df) tm.assert_frame_equal(store.select('df'),df) + def test_select_columns_in_where(self): + + # GH 6169 + # recreate multi-indexes when columns is passed + # in the `where` argument + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['foo_name', 'bar_name']) + + # With a DataFrame + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + with ensure_clean_store(self.path) as store: + store.put('df', df, format='table') + tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'], + check_index_type=True,check_column_type=True) + # With a Serie + s = Series(np.random.randn(10), index=index, + name='A') + with ensure_clean_store(self.path) as store: + store.put('s', s) + tm.assert_frame_equal(store.select('s', where="columns=['A']"),s, + check_index_type=True,check_column_type=True) + def test_pass_spec_to_storer(self): df = tm.makeDataFrame()