Skip to content

Commit f199e9e

Browse files
committed
BUG: correctly select on a multi-index even in the prescence of under specificed columsn spec (GH6169)
1 parent 1d63bb2 commit f199e9e

File tree

3 files changed

+31
-27
lines changed

3 files changed

+31
-27
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ Bug Fixes
175175
- Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to
176176
an existing table (:issue:`6167`)
177177
- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`)
178+
- Bug in selecting on a multi-index ``HDFStore`` even in the prescence of under
179+
specificed column spec (:issue:`6169`)
178180

179181
pandas 0.13.0
180182
-------------

pandas/io/pytables.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
32893289
def process_axes(self, obj, columns=None):
32903290
""" process axes filters """
32913291

3292+
# make sure to include levels if we have them
3293+
if columns is not None and self.is_multi_index:
3294+
for n in self.levels:
3295+
if n not in columns:
3296+
columns.insert(0, n)
3297+
32923298
# reorder by any non_index_axes & limit to the select columns
32933299
for axis, labels in self.non_index_axes:
32943300
obj = _reindex_axis(obj, axis, labels, columns)
@@ -3305,6 +3311,12 @@ def process_filter(field, filt):
33053311

33063312
# see if the field is the name of an axis
33073313
if field == axis_name:
3314+
3315+
# if we have a multi-index, then need to include
3316+
# the levels
3317+
if self.is_multi_index:
3318+
filt = filt + Index(self.levels)
3319+
33083320
takers = op(axis_values, filt)
33093321
return obj.ix._getitem_axis(takers,
33103322
axis=axis_number)
@@ -3951,31 +3963,18 @@ def write(self, obj, data_columns=None, **kwargs):
39513963
return super(AppendableMultiFrameTable, self).write(
39523964
obj=obj, data_columns=data_columns, **kwargs)
39533965

3954-
def read(self, columns=None, **kwargs):
3955-
if columns is not None:
3956-
for n in self.levels:
3957-
if n not in columns:
3958-
columns.insert(0, n)
3959-
df = super(AppendableMultiFrameTable, self).read(
3960-
columns=columns, **kwargs)
3961-
try:
3962-
df = df.set_index(self.levels)
3963-
except KeyError:
3964-
if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr:
3965-
raise KeyError(
3966-
"Indexes columns were not retrieved because you passed "
3967-
"a `where` argument containing columns specification. "
3968-
"(see http://github.com/pydata/pandas/issues/6169), try passing "
3969-
"the columns specification through the `columns` keyword instead"
3970-
)
3966+
def read(self, **kwargs):
3967+
3968+
df = super(AppendableMultiFrameTable, self).read(**kwargs)
3969+
df = df.set_index(self.levels)
3970+
39713971
# remove names for 'level_%d'
39723972
df.index = df.index.set_names([
39733973
None if self._re_levels.search(l) else l for l in df.index.names
39743974
])
39753975

39763976
return df
39773977

3978-
39793978
class AppendablePanelTable(AppendableTable):
39803979

39813980
""" suppor the new appendable table formats """

pandas/io/tests/test_pytables.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -1674,7 +1674,7 @@ def make_index(names=None):
16741674
tm.assert_frame_equal(store.select('df'),df)
16751675

16761676
def test_select_columns_in_where(self):
1677-
1677+
16781678
# GH 6169
16791679
# recreate multi-indexes when columns is passed
16801680
# in the `where` argument
@@ -1687,19 +1687,22 @@ def test_select_columns_in_where(self):
16871687
# With a DataFrame
16881688
df = DataFrame(np.random.randn(10, 3), index=index,
16891689
columns=['A', 'B', 'C'])
1690-
1690+
16911691
with ensure_clean_store(self.path) as store:
16921692
store.put('df', df, format='table')
1693-
tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'],
1694-
check_index_type=True,check_column_type=True)
1695-
# With a Serie
1693+
expected = df[['A']]
1694+
1695+
tm.assert_frame_equal(store.select('df', columns=['A']), expected)
1696+
1697+
tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)
1698+
1699+
# With a Series
16961700
s = Series(np.random.randn(10), index=index,
16971701
name='A')
16981702
with ensure_clean_store(self.path) as store:
1699-
store.put('s', s)
1700-
tm.assert_frame_equal(store.select('s', where="columns=['A']"),s,
1701-
check_index_type=True,check_column_type=True)
1702-
1703+
store.put('s', s, format='table')
1704+
tm.assert_series_equal(store.select('s', where="columns=['A']"),s)
1705+
17031706
def test_pass_spec_to_storer(self):
17041707

17051708
df = tm.makeDataFrame()

0 commit comments

Comments
 (0)