Skip to content

API/BUG: a column multi-index will be recreated properly (GH4710) #4716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 31, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ pandas 0.13
via the option ``io.hdf.dropna_table`` (:issue:`4625`)
- the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)|table(t)``
the ``Storer`` format has been renamed to ``Fixed``
- a column multi-index will be recreated properly (:issue:`4710`); raise on trying to use a multi-index
with data_columns on the same axis
- ``JSON``

- added ``date_unit`` parameter to specify resolution of timestamps. Options
Expand Down
37 changes: 30 additions & 7 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import warnings

import numpy as np
import pandas
from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index,
MultiIndex, Int64Index, Timestamp)
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
Expand Down Expand Up @@ -1379,11 +1380,7 @@ def update_info(self, info):
for key in self._info_fields:

value = getattr(self, key, None)

try:
idx = info[self.name]
except:
idx = info[self.name] = dict()
idx = _get_info(info, self.name)

existing_value = idx.get(key)
if key in idx and value is not None and existing_value != value:
Expand Down Expand Up @@ -2783,7 +2780,10 @@ def validate_data_columns(self, data_columns, min_itemsize):
if not len(self.non_index_axes):
return []

axis_labels = self.non_index_axes[0][1]
axis, axis_labels = self.non_index_axes[0]
info = self.info.get(axis,dict())
if info.get('type') == 'MultiIndex' and data_columns is not None:
raise ValueError("cannot use a multi-index on axis [{0}] with data_columns".format(axis))

# evaluate the passed data_columns, True == use all columns
# take only valide axis labels
Expand Down Expand Up @@ -2879,6 +2879,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
if sorted(append_axis) == sorted(exist_axis):
append_axis = exist_axis

# the non_index_axes info
info = _get_info(self.info,i)
info['names'] = list(a.names)
info['type'] = a.__class__.__name__

self.non_index_axes.append((i, append_axis))

# set axis positions (based on the axes)
Expand Down Expand Up @@ -3459,10 +3464,20 @@ def read(self, where=None, columns=None, **kwargs):
if not self.read_axes(where=where, **kwargs):
return None

info = self.info.get(self.non_index_axes[0][0],dict()) if len(self.non_index_axes) else dict()
index = self.index_axes[0].values
frames = []
for a in self.values_axes:
cols = Index(a.values)

# we could have a multi-index constructor here
# _ensure_index doesn't recognized our list-of-tuples here
if info.get('type') == 'MultiIndex':
cols = MultiIndex.from_tuples(a.values)
else:
cols = Index(a.values)
names = info.get('names')
if names is not None:
cols.set_names(names,inplace=True)

if self.is_transposed:
values = a.cvalues
Expand Down Expand Up @@ -3657,6 +3672,14 @@ class AppendableNDimTable(AppendablePanelTable):
obj_type = Panel4D


def _get_info(info, name):
""" get/create the info for this name """
try:
idx = info[name]
except:
idx = info[name] = dict()
return idx

def _convert_index(index, encoding=None):
index_name = getattr(index, 'name', None)

Expand Down
26 changes: 26 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,32 @@ def test_append_hierarchical(self):
expected = df.reindex(columns=['A','B'])
tm.assert_frame_equal(result,expected)

def test_column_multiindex(self):
# GH 4710
# recreate multi-indexes properly

index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second'])
df = DataFrame(np.arange(12).reshape(3,4), columns=index)

with ensure_clean(self.path) as store:

store.put('df',df)
tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True)

store.put('df1',df,format='table')
tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)

self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A'])
self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True)

# non_index_axes name
df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo'))

with ensure_clean(self.path) as store:

store.put('df1',df,format='table')
tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)

def test_pass_spec_to_storer(self):

df = tm.makeDataFrame()
Expand Down