Skip to content

BUG: correctly select on a multi-index even in the prescence of under specificed column spec (GH6169) #6202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 31, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ Bug Fixes
- Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to
an existing table (:issue:`6167`)
- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`)
- Bug in selecting on a multi-index ``HDFStore`` even in the prescence of under
specificed column spec (:issue:`6169`)

pandas 0.13.0
-------------
Expand Down
23 changes: 15 additions & 8 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3289,6 +3289,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
def process_axes(self, obj, columns=None):
""" process axes filters """

# make sure to include levels if we have them
if columns is not None and self.is_multi_index:
for n in self.levels:
if n not in columns:
columns.insert(0, n)

# reorder by any non_index_axes & limit to the select columns
for axis, labels in self.non_index_axes:
obj = _reindex_axis(obj, axis, labels, columns)
Expand All @@ -3305,6 +3311,12 @@ def process_filter(field, filt):

# see if the field is the name of an axis
if field == axis_name:

# if we have a multi-index, then need to include
# the levels
if self.is_multi_index:
filt = filt + Index(self.levels)

takers = op(axis_values, filt)
return obj.ix._getitem_axis(takers,
axis=axis_number)
Expand Down Expand Up @@ -3951,13 +3963,9 @@ def write(self, obj, data_columns=None, **kwargs):
return super(AppendableMultiFrameTable, self).write(
obj=obj, data_columns=data_columns, **kwargs)

def read(self, columns=None, **kwargs):
if columns is not None:
for n in self.levels:
if n not in columns:
columns.insert(0, n)
df = super(AppendableMultiFrameTable, self).read(
columns=columns, **kwargs)
def read(self, **kwargs):

df = super(AppendableMultiFrameTable, self).read(**kwargs)
df = df.set_index(self.levels)

# remove names for 'level_%d'
Expand All @@ -3967,7 +3975,6 @@ def read(self, columns=None, **kwargs):

return df


class AppendablePanelTable(AppendableTable):

""" suppor the new appendable table formats """
Expand Down
30 changes: 30 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,36 @@ def make_index(names=None):
store.append('df',df)
tm.assert_frame_equal(store.select('df'),df)

def test_select_columns_in_where(self):

# GH 6169
# recreate multi-indexes when columns is passed
# in the `where` argument
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['foo_name', 'bar_name'])

# With a DataFrame
df = DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])

with ensure_clean_store(self.path) as store:
store.put('df', df, format='table')
expected = df[['A']]

tm.assert_frame_equal(store.select('df', columns=['A']), expected)

tm.assert_frame_equal(store.select('df', where="columns=['A']"), expected)

# With a Series
s = Series(np.random.randn(10), index=index,
name='A')
with ensure_clean_store(self.path) as store:
store.put('s', s, format='table')
tm.assert_series_equal(store.select('s', where="columns=['A']"),s)

def test_pass_spec_to_storer(self):

df = tm.makeDataFrame()
Expand Down