Skip to content

Commit 7d0a9cd

Browse files
committed
Merge pull request #6202 from jreback/mi_hdf
BUG: correctly select on a multi-index even in the prescence of under specificed column spec (GH6169)
2 parents 91a2693 + f199e9e commit 7d0a9cd

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ Bug Fixes
175175
- Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to
176176
an existing table (:issue:`6167`)
177177
- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`)
178+
- Bug in selecting on a multi-index ``HDFStore`` even in the prescence of under
179+
specificed column spec (:issue:`6169`)
178180

179181
pandas 0.13.0
180182
-------------

pandas/io/pytables.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
32893289
def process_axes(self, obj, columns=None):
32903290
""" process axes filters """
32913291

3292+
# make sure to include levels if we have them
3293+
if columns is not None and self.is_multi_index:
3294+
for n in self.levels:
3295+
if n not in columns:
3296+
columns.insert(0, n)
3297+
32923298
# reorder by any non_index_axes & limit to the select columns
32933299
for axis, labels in self.non_index_axes:
32943300
obj = _reindex_axis(obj, axis, labels, columns)
@@ -3305,6 +3311,12 @@ def process_filter(field, filt):
33053311

33063312
# see if the field is the name of an axis
33073313
if field == axis_name:
3314+
3315+
# if we have a multi-index, then need to include
3316+
# the levels
3317+
if self.is_multi_index:
3318+
filt = filt + Index(self.levels)
3319+
33083320
takers = op(axis_values, filt)
33093321
return obj.ix._getitem_axis(takers,
33103322
axis=axis_number)
@@ -3951,13 +3963,9 @@ def write(self, obj, data_columns=None, **kwargs):
39513963
return super(AppendableMultiFrameTable, self).write(
39523964
obj=obj, data_columns=data_columns, **kwargs)
39533965

3954-
def read(self, columns=None, **kwargs):
3955-
if columns is not None:
3956-
for n in self.levels:
3957-
if n not in columns:
3958-
columns.insert(0, n)
3959-
df = super(AppendableMultiFrameTable, self).read(
3960-
columns=columns, **kwargs)
3966+
def read(self, **kwargs):
3967+
3968+
df = super(AppendableMultiFrameTable, self).read(**kwargs)
39613969
df = df.set_index(self.levels)
39623970

39633971
# remove names for 'level_%d'
@@ -3967,7 +3975,6 @@ def read(self, columns=None, **kwargs):
39673975

39683976
return df
39693977

3970-
39713978
class AppendablePanelTable(AppendableTable):
39723979

39733980
""" suppor the new appendable table formats """

pandas/io/tests/test_pytables.py

+30
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,36 @@ def make_index(names=None):
16731673
store.append('df',df)
16741674
tm.assert_frame_equal(store.select('df'),df)
16751675

1676+
def test_select_columns_in_where(self):
1677+
1678+
# GH 6169
1679+
# recreate multi-indexes when columns is passed
1680+
# in the `where` argument
1681+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
1682+
['one', 'two', 'three']],
1683+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
1684+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
1685+
names=['foo_name', 'bar_name'])
1686+
1687+
# With a DataFrame
1688+
df = DataFrame(np.random.randn(10, 3), index=index,
1689+
columns=['A', 'B', 'C'])
1690+
1691+
with ensure_clean_store(self.path) as store:
1692+
store.put('df', df, format='table')
1693+
expected = df[['A']]
1694+
1695+
tm.assert_frame_equal(store.select('df', columns=['A']), expected)
1696+
1697+
tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)
1698+
1699+
# With a Series
1700+
s = Series(np.random.randn(10), index=index,
1701+
name='A')
1702+
with ensure_clean_store(self.path) as store:
1703+
store.put('s', s, format='table')
1704+
tm.assert_series_equal(store.select('s', where="columns=['A']"),s)
1705+
16761706
def test_pass_spec_to_storer(self):
16771707

16781708
df = tm.makeDataFrame()

0 commit comments

Comments
 (0)