From 1d63bb21df8ca7222cda3440ea5401cb0c0402a0 Mon Sep 17 00:00:00 2001
From: Guillaume Gay <guillaume@mitotic-machine.org>
Date: Fri, 31 Jan 2014 10:47:47 +0100
Subject: [PATCH 1/2] BUG/TST raise a more detailed error when GH6169 occurs,
 added a test

Raise a detailed error when a `columns` argument is passed through 'where' to
select a multiIndexed Dataframe from an HDF store. Wrote a test
showcasing the bug

	modified:   pandas/io/pytables.py
	modified:   pandas/io/tests/test_pytables.py

making it work
---
 pandas/io/pytables.py            | 12 ++++++++++--
 pandas/io/tests/test_pytables.py | 27 +++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index bb487f5102e0a..9d1ce4f4b82bc 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3958,8 +3958,16 @@ def read(self, columns=None, **kwargs):
                     columns.insert(0, n)
         df = super(AppendableMultiFrameTable, self).read(
             columns=columns, **kwargs)
-        df = df.set_index(self.levels)
-
+        try:
+            df = df.set_index(self.levels)
+        except KeyError:
+            if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr:
+                raise KeyError(
+                "Indexes columns were not retrieved because you passed "
+                "a `where` argument  containing columns specification. "
+                "(see http://github.com/pydata/pandas/issues/6169), try passing "
+                "the columns specification through the `columns` keyword instead"
+                )
         # remove names for 'level_%d'
         df.index = df.index.set_names([
             None if self._re_levels.search(l) else l for l in df.index.names
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 9c56ee468f6ac..29f536b3bf5d9 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1673,6 +1673,33 @@ def make_index(names=None):
             store.append('df',df)
             tm.assert_frame_equal(store.select('df'),df)
 
+    def test_select_columns_in_where(self):
+        
+        # GH 6169
+        # recreate multi-indexes when columns is passed
+        # in the `where` argument
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
+                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                           names=['foo_name', 'bar_name'])
+
+        # With a DataFrame
+        df = DataFrame(np.random.randn(10, 3), index=index,
+                       columns=['A', 'B', 'C'])
+        
+        with ensure_clean_store(self.path) as store:
+            store.put('df', df, format='table')
+            tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'],
+                                  check_index_type=True,check_column_type=True)
+        # With a Serie
+        s = Series(np.random.randn(10), index=index,
+                   name='A')
+        with ensure_clean_store(self.path) as store:
+            store.put('s', s)
+            tm.assert_frame_equal(store.select('s', where="columns=['A']"),s,
+                                  check_index_type=True,check_column_type=True)
+            
     def test_pass_spec_to_storer(self):
 
         df = tm.makeDataFrame()

From f199e9e2220f07ae539ec0bb37b108dcba49fd5c Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 31 Jan 2014 07:58:47 -0500
Subject: [PATCH 2/2] BUG: correctly select on a multi-index even in the
 prescence of under specificed columsn spec (GH6169)

---
 doc/source/release.rst           |  2 ++
 pandas/io/pytables.py            | 35 ++++++++++++++++----------------
 pandas/io/tests/test_pytables.py | 21 +++++++++++--------
 3 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index d701d1dacc16d..b9115c79354a6 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -175,6 +175,8 @@ Bug Fixes
   - Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to
     an existing table (:issue:`6167`)
   - Consistency with dtypes in setting an empty DataFrame (:issue:`6171`)
+  - Bug in  selecting on a multi-index ``HDFStore`` even in the prescence of under
+    specificed column spec (:issue:`6169`)
 
 pandas 0.13.0
 -------------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 9d1ce4f4b82bc..8bae83dce7546 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
     def process_axes(self, obj, columns=None):
         """ process axes filters """
 
+        # make sure to include levels if we have them
+        if columns is not None and self.is_multi_index:
+            for n in self.levels:
+                if n not in columns:
+                    columns.insert(0, n)
+
         # reorder by any non_index_axes & limit to the select columns
         for axis, labels in self.non_index_axes:
             obj = _reindex_axis(obj, axis, labels, columns)
@@ -3305,6 +3311,12 @@ def process_filter(field, filt):
 
                         # see if the field is the name of an axis
                         if field == axis_name:
+
+                            # if we have a multi-index, then need to include
+                            # the levels
+                            if self.is_multi_index:
+                                filt = filt + Index(self.levels)
+
                             takers = op(axis_values, filt)
                             return obj.ix._getitem_axis(takers,
                                                         axis=axis_number)
@@ -3951,23 +3963,11 @@ def write(self, obj, data_columns=None, **kwargs):
         return super(AppendableMultiFrameTable, self).write(
             obj=obj, data_columns=data_columns, **kwargs)
 
-    def read(self, columns=None, **kwargs):
-        if columns is not None:
-            for n in self.levels:
-                if n not in columns:
-                    columns.insert(0, n)
-        df = super(AppendableMultiFrameTable, self).read(
-            columns=columns, **kwargs)
-        try:
-            df = df.set_index(self.levels)
-        except KeyError:
-            if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr:
-                raise KeyError(
-                "Indexes columns were not retrieved because you passed "
-                "a `where` argument  containing columns specification. "
-                "(see http://github.com/pydata/pandas/issues/6169), try passing "
-                "the columns specification through the `columns` keyword instead"
-                )
+    def read(self, **kwargs):
+
+        df = super(AppendableMultiFrameTable, self).read(**kwargs)
+        df = df.set_index(self.levels)
+
         # remove names for 'level_%d'
         df.index = df.index.set_names([
             None if self._re_levels.search(l) else l for l in df.index.names
@@ -3975,7 +3975,6 @@ def read(self, columns=None, **kwargs):
 
         return df
 
-
 class AppendablePanelTable(AppendableTable):
 
     """ suppor the new appendable table formats """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 29f536b3bf5d9..dc218b530db64 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1674,7 +1674,7 @@ def make_index(names=None):
             tm.assert_frame_equal(store.select('df'),df)
 
     def test_select_columns_in_where(self):
-        
+
         # GH 6169
         # recreate multi-indexes when columns is passed
         # in the `where` argument
@@ -1687,19 +1687,22 @@ def test_select_columns_in_where(self):
         # With a DataFrame
         df = DataFrame(np.random.randn(10, 3), index=index,
                        columns=['A', 'B', 'C'])
-        
+
         with ensure_clean_store(self.path) as store:
             store.put('df', df, format='table')
-            tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'],
-                                  check_index_type=True,check_column_type=True)
-        # With a Serie
+            expected = df[['A']]
+
+            tm.assert_frame_equal(store.select('df', columns=['A']), expected)
+
+            tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)
+
+        # With a Series
         s = Series(np.random.randn(10), index=index,
                    name='A')
         with ensure_clean_store(self.path) as store:
-            store.put('s', s)
-            tm.assert_frame_equal(store.select('s', where="columns=['A']"),s,
-                                  check_index_type=True,check_column_type=True)
-            
+            store.put('s', s, format='table')
+            tm.assert_series_equal(store.select('s', where="columns=['A']"),s)
+
     def test_pass_spec_to_storer(self):
 
         df = tm.makeDataFrame()