Skip to content

Commit 1d63bb2

Browse files
Guillaume Gayjreback
Guillaume Gay
authored andcommitted
BUG/TST raise a more detailed error when GH6169 occurs, added a test
Raise a detailed error when a `columns` argument is passed through 'where' to select a multiIndexed Dataframe from an HDF store. Wrote a test showcasing the bug modified: pandas/io/pytables.py modified: pandas/io/tests/test_pytables.py making it work
1 parent 91a2693 commit 1d63bb2

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

pandas/io/pytables.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -3958,8 +3958,16 @@ def read(self, columns=None, **kwargs):
39583958
columns.insert(0, n)
39593959
df = super(AppendableMultiFrameTable, self).read(
39603960
columns=columns, **kwargs)
3961-
df = df.set_index(self.levels)
3962-
3961+
try:
3962+
df = df.set_index(self.levels)
3963+
except KeyError:
3964+
if kwargs.get('where') is not None and 'columns' in kwargs.get('where').expr:
3965+
raise KeyError(
3966+
"Indexes columns were not retrieved because you passed "
3967+
"a `where` argument containing columns specification. "
3968+
"(see http://github.com/pydata/pandas/issues/6169), try passing "
3969+
"the columns specification through the `columns` keyword instead"
3970+
)
39633971
# remove names for 'level_%d'
39643972
df.index = df.index.set_names([
39653973
None if self._re_levels.search(l) else l for l in df.index.names

pandas/io/tests/test_pytables.py

+27
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,33 @@ def make_index(names=None):
16731673
store.append('df',df)
16741674
tm.assert_frame_equal(store.select('df'),df)
16751675

1676+
def test_select_columns_in_where(self):
1677+
1678+
# GH 6169
1679+
# recreate multi-indexes when columns is passed
1680+
# in the `where` argument
1681+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
1682+
['one', 'two', 'three']],
1683+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
1684+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
1685+
names=['foo_name', 'bar_name'])
1686+
1687+
# With a DataFrame
1688+
df = DataFrame(np.random.randn(10, 3), index=index,
1689+
columns=['A', 'B', 'C'])
1690+
1691+
with ensure_clean_store(self.path) as store:
1692+
store.put('df', df, format='table')
1693+
tm.assert_frame_equal(store.select('df', where="columns=['A']"),df['A'],
1694+
check_index_type=True,check_column_type=True)
1695+
# With a Serie
1696+
s = Series(np.random.randn(10), index=index,
1697+
name='A')
1698+
with ensure_clean_store(self.path) as store:
1699+
store.put('s', s)
1700+
tm.assert_frame_equal(store.select('s', where="columns=['A']"),s,
1701+
check_index_type=True,check_column_type=True)
1702+
16761703
def test_pass_spec_to_storer(self):
16771704

16781705
df = tm.makeDataFrame()

0 commit comments

Comments
 (0)