Skip to content

BUG: (GH3748) Incorrectly read a HDFStore multi-index Frame witha column specification #3749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 3, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ pandas 0.11.1

- When removing an object from a ``HDFStore``, ``remove(key)`` raises
``KeyError`` if the key is not a valid store object.
- In an ``HDFStore``, raise a ``TypeError`` on passing ``where`` or ``columns``
to select with a Storer; these are invalid parameters at this time
- The repr() for (Multi)Index now obeys display.max_seq_items rather
then numpy threshold print options. (GH3426_, GH3466_)
- Added mangle_dupe_cols option to read_table/csv, allowing users
Expand Down Expand Up @@ -197,6 +199,7 @@ pandas 0.11.1
their first argument (GH3702_)
- Fix file tokenization error with \r delimiter and quoted fields (GH3453_)
- Groupby transform with item-by-item not upcasting correctly (GH3740_)
- Incorrectly read a HDFStore multi-index Frame witha column specification (GH3748_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -280,6 +283,7 @@ pandas 0.11.1
.. _GH3667: https://github.com/pydata/pandas/issues/3667
.. _GH3733: https://github.com/pydata/pandas/issues/3733
.. _GH3740: https://github.com/pydata/pandas/issues/3740
.. _GH3748: https://github.com/pydata/pandas/issues/3748

pandas 0.11.0
=============
Expand Down
22 changes: 20 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,12 @@ def f(values, freq=None, tz=None):
return f
return klass

def validate_read(self, kwargs):
if kwargs.get('columns') is not None:
raise TypeError("cannot pass a column specification when reading a Storer")
if kwargs.get('where') is not None:
raise TypeError("cannot pass a where specification when reading a Storer")

@property
def is_exists(self):
return True
Expand Down Expand Up @@ -1921,13 +1927,15 @@ def read_index_legacy(self, key):
class LegacySeriesStorer(LegacyStorer):

def read(self, **kwargs):
self.validate_read(kwargs)
index = self.read_index_legacy('index')
values = self.read_array('values')
return Series(values, index=index)

class LegacyFrameStorer(LegacyStorer):

def read(self, **kwargs):
self.validate_read(kwargs)
index = self.read_index_legacy('index')
columns = self.read_index_legacy('columns')
values = self.read_array('values')
Expand All @@ -1945,6 +1953,7 @@ def shape(self):
return None

def read(self, **kwargs):
self.validate_read(kwargs)
index = self.read_index('index')
if len(index) > 0:
values = self.read_array('values')
Expand All @@ -1963,6 +1972,7 @@ class SparseSeriesStorer(GenericStorer):
attributes = ['name','fill_value','kind']

def read(self, **kwargs):
self.validate_read(kwargs)
index = self.read_index('index')
sp_values = self.read_array('sp_values')
sp_index = self.read_index('sp_index')
Expand All @@ -1983,6 +1993,7 @@ class SparseFrameStorer(GenericStorer):
attributes = ['default_kind','default_fill_value']

def read(self, **kwargs):
self.validate_read(kwargs)
columns = self.read_index('columns')
sdict = {}
for c in columns:
Expand Down Expand Up @@ -2013,6 +2024,7 @@ class SparsePanelStorer(GenericStorer):
attributes = ['default_kind','default_fill_value']

def read(self, **kwargs):
self.validate_read(kwargs)
items = self.read_index('items')

sdict = {}
Expand Down Expand Up @@ -2075,6 +2087,8 @@ def shape(self):
return None

def read(self, **kwargs):
self.validate_read(kwargs)

axes = []
for i in xrange(self.ndim):
ax = self.read_index('axis%d' % i)
Expand Down Expand Up @@ -3124,8 +3138,12 @@ def write(self, obj, data_columns=None, **kwargs):
self.levels = obj.index.names
return super(AppendableMultiFrameTable, self).write(obj=obj.reset_index(), data_columns=data_columns, **kwargs)

def read(self, *args, **kwargs):
df = super(AppendableMultiFrameTable, self).read(*args, **kwargs)
def read(self, columns=None, **kwargs):
if columns is not None:
for n in self.levels:
if n not in columns:
columns.insert(0, n)
df = super(AppendableMultiFrameTable, self).read(columns=columns, **kwargs)
df.set_index(self.levels, inplace=True)
return df

Expand Down
20 changes: 20 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,26 @@ def test_append_hierarchical(self):
result = store.select('mi')
tm.assert_frame_equal(result, df)

# GH 3748
result = store.select('mi',columns=['A','B'])
expected = df.reindex(columns=['A','B'])
tm.assert_frame_equal(result,expected)

with tm.ensure_clean('test.hdf') as path:
df.to_hdf(path,'df',table=True)
result = read_hdf(path,'df',columns=['A','B'])
expected = df.reindex(columns=['A','B'])
tm.assert_frame_equal(result,expected)

def test_pass_spec_to_storer(self):

df = tm.makeDataFrame()

with ensure_clean(self.path) as store:
store.put('df',df)
self.assertRaises(TypeError, store.select, 'df', columns=['A'])
self.assertRaises(TypeError, store.select, 'df',where=[('columns=A')])

def test_append_misc(self):

with ensure_clean(self.path) as store:
Expand Down