Merge pull request #6202 from jreback/mi_hdf

jreback · jreback · commit 7d0a9cd7454c · 2014-01-31T05:31:59.000-08:00
BUG: correctly select on a multi-index even in the prescence of under specificed column spec (GH6169)
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -175,6 +175,8 @@ Bug Fixes
   - Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to
     an existing table (:issue:`6167`)
   - Consistency with dtypes in setting an empty DataFrame (:issue:`6171`)
+  - Bug in  selecting on a multi-index ``HDFStore`` even in the prescence of under
+    specificed column spec (:issue:`6169`)
 
 pandas 0.13.0
 -------------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
     def process_axes(self, obj, columns=None):
         """ process axes filters """
 
+        # make sure to include levels if we have them
+        if columns is not None and self.is_multi_index:
+            for n in self.levels:
+                if n not in columns:
+                    columns.insert(0, n)
+
         # reorder by any non_index_axes & limit to the select columns
         for axis, labels in self.non_index_axes:
             obj = _reindex_axis(obj, axis, labels, columns)
@@ -3305,6 +3311,12 @@ def process_filter(field, filt):
 
                         # see if the field is the name of an axis
                         if field == axis_name:
+
+                            # if we have a multi-index, then need to include
+                            # the levels
+                            if self.is_multi_index:
+                                filt = filt + Index(self.levels)
+
                             takers = op(axis_values, filt)
                             return obj.ix._getitem_axis(takers,
                                                         axis=axis_number)
@@ -3951,13 +3963,9 @@ def write(self, obj, data_columns=None, **kwargs):
         return super(AppendableMultiFrameTable, self).write(
             obj=obj, data_columns=data_columns, **kwargs)
 
-    def read(self, columns=None, **kwargs):
-        if columns is not None:
-            for n in self.levels:
-                if n not in columns:
-                    columns.insert(0, n)
-        df = super(AppendableMultiFrameTable, self).read(
-            columns=columns, **kwargs)
+    def read(self, **kwargs):
+
+        df = super(AppendableMultiFrameTable, self).read(**kwargs)
         df = df.set_index(self.levels)
 
         # remove names for 'level_%d'
@@ -3967,7 +3975,6 @@ def read(self, columns=None, **kwargs):
 
         return df
 
-
 class AppendablePanelTable(AppendableTable):
 
     """ suppor the new appendable table formats """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -1673,6 +1673,36 @@ def make_index(names=None):
             store.append('df',df)
             tm.assert_frame_equal(store.select('df'),df)
 
+    def test_select_columns_in_where(self):
+
+        # GH 6169
+        # recreate multi-indexes when columns is passed
+        # in the `where` argument
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
+                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                           names=['foo_name', 'bar_name'])
+
+        # With a DataFrame
+        df = DataFrame(np.random.randn(10, 3), index=index,
+                       columns=['A', 'B', 'C'])
+
+        with ensure_clean_store(self.path) as store:
+            store.put('df', df, format='table')
+            expected = df[['A']]
+
+            tm.assert_frame_equal(store.select('df', columns=['A']), expected)
+
+            tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)
+
+        # With a Series
+        s = Series(np.random.randn(10), index=index,
+                   name='A')
+        with ensure_clean_store(self.path) as store:
+            store.put('s', s, format='table')
+            tm.assert_series_equal(store.select('s', where="columns=['A']"),s)
+
     def test_pass_spec_to_storer(self):
 
         df = tm.makeDataFrame()