From e8de37369362225c524b87a98935389d5c9599c7 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 31 Aug 2013 15:49:35 -0400 Subject: [PATCH] API/BUG: a column multi-index will be recreated properly (GH4710) API: raise on trying to use a multi-index with data_columns on the same axis --- doc/source/release.rst | 2 ++ pandas/io/pytables.py | 37 ++++++++++++++++++++++++++------ pandas/io/tests/test_pytables.py | 26 ++++++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 15c2895f19cc3..570300b7c79de 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -113,6 +113,8 @@ pandas 0.13 via the option ``io.hdf.dropna_table`` (:issue:`4625`) - the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)|table(t)`` the ``Storer`` format has been renamed to ``Fixed`` + - a column multi-index will be recreated properly (:issue:`4710`); raise on trying to use a multi-index + with data_columns on the same axis - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. Options diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index aa1c20d582b5b..600f886c57c65 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -15,6 +15,7 @@ import warnings import numpy as np +import pandas from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index, Timestamp) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel @@ -1379,11 +1380,7 @@ def update_info(self, info): for key in self._info_fields: value = getattr(self, key, None) - - try: - idx = info[self.name] - except: - idx = info[self.name] = dict() + idx = _get_info(info, self.name) existing_value = idx.get(key) if key in idx and value is not None and existing_value != value: @@ -2783,7 +2780,10 @@ def validate_data_columns(self, data_columns, min_itemsize): if not len(self.non_index_axes): return [] - axis_labels = self.non_index_axes[0][1] + axis, axis_labels = self.non_index_axes[0] + info = self.info.get(axis,dict()) + if info.get('type') == 'MultiIndex' and data_columns is not None: + raise ValueError("cannot use a multi-index on axis [{0}] with data_columns".format(axis)) # evaluate the passed data_columns, True == use all columns # take only valide axis labels @@ -2879,6 +2879,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, if sorted(append_axis) == sorted(exist_axis): append_axis = exist_axis + # the non_index_axes info + info = _get_info(self.info,i) + info['names'] = list(a.names) + info['type'] = a.__class__.__name__ + self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) @@ -3459,10 +3464,20 @@ def read(self, where=None, columns=None, **kwargs): if not self.read_axes(where=where, **kwargs): return None + info = self.info.get(self.non_index_axes[0][0],dict()) if len(self.non_index_axes) else dict() index = self.index_axes[0].values frames = [] for a in self.values_axes: - cols = Index(a.values) + + # we could have a multi-index constructor here + # _ensure_index doesn't recognized our list-of-tuples here + if info.get('type') == 'MultiIndex': + cols = MultiIndex.from_tuples(a.values) + else: + cols = Index(a.values) + names = info.get('names') + if names is not None: + cols.set_names(names,inplace=True) if self.is_transposed: values = a.cvalues @@ -3657,6 +3672,14 @@ class AppendableNDimTable(AppendablePanelTable): obj_type = Panel4D +def _get_info(info, name): + """ get/create the info for this name """ + try: + idx = info[name] + except: + idx = info[name] = dict() + return idx + def _convert_index(index, encoding=None): index_name = getattr(index, 'name', None) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index e2abae83a099a..66f3d3766ee3e 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1448,6 +1448,32 @@ def test_append_hierarchical(self): expected = df.reindex(columns=['A','B']) tm.assert_frame_equal(result,expected) + def test_column_multiindex(self): + # GH 4710 + # recreate multi-indexes properly + + index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second']) + df = DataFrame(np.arange(12).reshape(3,4), columns=index) + + with ensure_clean(self.path) as store: + + store.put('df',df) + tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True) + + store.put('df1',df,format='table') + tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + + self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A']) + self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True) + + # non_index_axes name + df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo')) + + with ensure_clean(self.path) as store: + + store.put('df1',df,format='table') + tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + def test_pass_spec_to_storer(self): df = tm.makeDataFrame()