From 1d63bb21df8ca7222cda3440ea5401cb0c0402a0 Mon Sep 17 00:00:00 2001 From: Guillaume Gay Date: Fri, 31 Jan 2014 10:47:47 +0100 Subject: [PATCH 1/2] BUG/TST raise a more detailed error when GH6169 occurs, added a test Raise a detailed error when a `columns` argument is passed through 'where' to select a multiIndexed Dataframe from an HDF store. Wrote a test showcasing the bug modified: pandas/io/pytables.py modified: pandas/io/tests/test_pytables.py making it work --- pandas/io/pytables.py | 12 ++++++++++-- pandas/io/tests/test_pytables.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bb487f5102e0a..9d1ce4f4b82bc 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3958,8 +3958,16 @@ def read(self, columns=None, **kwargs): columns.insert(0, n) df = super(AppendableMultiFrameTable, self).read( columns=columns, **kwargs) - df = df.set_index(self.levels) - + try: + df = df.set_index(self.levels) + except KeyError: + if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr: + raise KeyError( + "Indexes columns were not retrieved because you passed " + "a `where` argument containing columns specification. " + "(see http://github.com/pydata/pandas/issues/6169), try passing " + "the columns specification through the `columns` keyword instead" + ) # remove names for 'level_%d' df.index = df.index.set_names([ None if self._re_levels.search(l) else l for l in df.index.names diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 9c56ee468f6ac..29f536b3bf5d9 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1673,6 +1673,33 @@ def make_index(names=None): store.append('df',df) tm.assert_frame_equal(store.select('df'),df) + def test_select_columns_in_where(self): + + # GH 6169 + # recreate multi-indexes when columns is passed + # in the `where` argument + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['foo_name', 'bar_name']) + + # With a DataFrame + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + with ensure_clean_store(self.path) as store: + store.put('df', df, format='table') + tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'], + check_index_type=True,check_column_type=True) + # With a Serie + s = Series(np.random.randn(10), index=index, + name='A') + with ensure_clean_store(self.path) as store: + store.put('s', s) + tm.assert_frame_equal(store.select('s', where="columns=['A']"),s, + check_index_type=True,check_column_type=True) + def test_pass_spec_to_storer(self): df = tm.makeDataFrame() From f199e9e2220f07ae539ec0bb37b108dcba49fd5c Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 31 Jan 2014 07:58:47 -0500 Subject: [PATCH 2/2] BUG: correctly select on a multi-index even in the prescence of under specificed columsn spec (GH6169) --- doc/source/release.rst | 2 ++ pandas/io/pytables.py | 35 ++++++++++++++++---------------- pandas/io/tests/test_pytables.py | 21 +++++++++++-------- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index d701d1dacc16d..b9115c79354a6 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -175,6 +175,8 @@ Bug Fixes - Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to an existing table (:issue:`6167`) - Consistency with dtypes in setting an empty DataFrame (:issue:`6171`) + - Bug in selecting on a multi-index ``HDFStore`` even in the prescence of under + specificed column spec (:issue:`6169`) pandas 0.13.0 ------------- diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9d1ce4f4b82bc..8bae83dce7546 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, def process_axes(self, obj, columns=None): """ process axes filters """ + # make sure to include levels if we have them + if columns is not None and self.is_multi_index: + for n in self.levels: + if n not in columns: + columns.insert(0, n) + # reorder by any non_index_axes & limit to the select columns for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) @@ -3305,6 +3311,12 @@ def process_filter(field, filt): # see if the field is the name of an axis if field == axis_name: + + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt + Index(self.levels) + takers = op(axis_values, filt) return obj.ix._getitem_axis(takers, axis=axis_number) @@ -3951,23 +3963,11 @@ def write(self, obj, data_columns=None, **kwargs): return super(AppendableMultiFrameTable, self).write( obj=obj, data_columns=data_columns, **kwargs) - def read(self, columns=None, **kwargs): - if columns is not None: - for n in self.levels: - if n not in columns: - columns.insert(0, n) - df = super(AppendableMultiFrameTable, self).read( - columns=columns, **kwargs) - try: - df = df.set_index(self.levels) - except KeyError: - if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr: - raise KeyError( - "Indexes columns were not retrieved because you passed " - "a `where` argument containing columns specification. " - "(see http://github.com/pydata/pandas/issues/6169), try passing " - "the columns specification through the `columns` keyword instead" - ) + def read(self, **kwargs): + + df = super(AppendableMultiFrameTable, self).read(**kwargs) + df = df.set_index(self.levels) + # remove names for 'level_%d' df.index = df.index.set_names([ None if self._re_levels.search(l) else l for l in df.index.names @@ -3975,7 +3975,6 @@ def read(self, columns=None, **kwargs): return df - class AppendablePanelTable(AppendableTable): """ suppor the new appendable table formats """ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 29f536b3bf5d9..dc218b530db64 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1674,7 +1674,7 @@ def make_index(names=None): tm.assert_frame_equal(store.select('df'),df) def test_select_columns_in_where(self): - + # GH 6169 # recreate multi-indexes when columns is passed # in the `where` argument @@ -1687,19 +1687,22 @@ def test_select_columns_in_where(self): # With a DataFrame df = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) - + with ensure_clean_store(self.path) as store: store.put('df', df, format='table') - tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'], - check_index_type=True,check_column_type=True) - # With a Serie + expected = df[['A']] + + tm.assert_frame_equal(store.select('df', columns=['A']), expected) + + tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected) + + # With a Series s = Series(np.random.randn(10), index=index, name='A') with ensure_clean_store(self.path) as store: - store.put('s', s) - tm.assert_frame_equal(store.select('s', where="columns=['A']"),s, - check_index_type=True,check_column_type=True) - + store.put('s', s, format='table') + tm.assert_series_equal(store.select('s', where="columns=['A']"),s) + def test_pass_spec_to_storer(self): df = tm.makeDataFrame()