Skip to content

Commit e8de373

Browse files
committed
API/BUG: a column multi-index will be recreated properly (GH4710)
API: raise on trying to use a multi-index with data_columns on the same axis
1 parent 5148e90 commit e8de373

File tree

3 files changed

+58
-7
lines changed

3 files changed

+58
-7
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ pandas 0.13
113113
via the option ``io.hdf.dropna_table`` (:issue:`4625`)
114114
- the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)|table(t)``
115115
the ``Storer`` format has been renamed to ``Fixed``
116+
- a column multi-index will be recreated properly (:issue:`4710`); raise on trying to use a multi-index
117+
with data_columns on the same axis
116118
- ``JSON``
117119

118120
- added ``date_unit`` parameter to specify resolution of timestamps. Options

pandas/io/pytables.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import warnings
1616

1717
import numpy as np
18+
import pandas
1819
from pandas import (Series, TimeSeries, DataFrame, Panel, Panel4D, Index,
1920
MultiIndex, Int64Index, Timestamp)
2021
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
@@ -1379,11 +1380,7 @@ def update_info(self, info):
13791380
for key in self._info_fields:
13801381

13811382
value = getattr(self, key, None)
1382-
1383-
try:
1384-
idx = info[self.name]
1385-
except:
1386-
idx = info[self.name] = dict()
1383+
idx = _get_info(info, self.name)
13871384

13881385
existing_value = idx.get(key)
13891386
if key in idx and value is not None and existing_value != value:
@@ -2783,7 +2780,10 @@ def validate_data_columns(self, data_columns, min_itemsize):
27832780
if not len(self.non_index_axes):
27842781
return []
27852782

2786-
axis_labels = self.non_index_axes[0][1]
2783+
axis, axis_labels = self.non_index_axes[0]
2784+
info = self.info.get(axis,dict())
2785+
if info.get('type') == 'MultiIndex' and data_columns is not None:
2786+
raise ValueError("cannot use a multi-index on axis [{0}] with data_columns".format(axis))
27872787

27882788
# evaluate the passed data_columns, True == use all columns
27892789
# take only valide axis labels
@@ -2879,6 +2879,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
28792879
if sorted(append_axis) == sorted(exist_axis):
28802880
append_axis = exist_axis
28812881

2882+
# the non_index_axes info
2883+
info = _get_info(self.info,i)
2884+
info['names'] = list(a.names)
2885+
info['type'] = a.__class__.__name__
2886+
28822887
self.non_index_axes.append((i, append_axis))
28832888

28842889
# set axis positions (based on the axes)
@@ -3459,10 +3464,20 @@ def read(self, where=None, columns=None, **kwargs):
34593464
if not self.read_axes(where=where, **kwargs):
34603465
return None
34613466

3467+
info = self.info.get(self.non_index_axes[0][0],dict()) if len(self.non_index_axes) else dict()
34623468
index = self.index_axes[0].values
34633469
frames = []
34643470
for a in self.values_axes:
3465-
cols = Index(a.values)
3471+
3472+
# we could have a multi-index constructor here
3473+
# _ensure_index doesn't recognized our list-of-tuples here
3474+
if info.get('type') == 'MultiIndex':
3475+
cols = MultiIndex.from_tuples(a.values)
3476+
else:
3477+
cols = Index(a.values)
3478+
names = info.get('names')
3479+
if names is not None:
3480+
cols.set_names(names,inplace=True)
34663481

34673482
if self.is_transposed:
34683483
values = a.cvalues
@@ -3657,6 +3672,14 @@ class AppendableNDimTable(AppendablePanelTable):
36573672
obj_type = Panel4D
36583673

36593674

3675+
def _get_info(info, name):
3676+
""" get/create the info for this name """
3677+
try:
3678+
idx = info[name]
3679+
except:
3680+
idx = info[name] = dict()
3681+
return idx
3682+
36603683
def _convert_index(index, encoding=None):
36613684
index_name = getattr(index, 'name', None)
36623685

pandas/io/tests/test_pytables.py

+26
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,32 @@ def test_append_hierarchical(self):
14481448
expected = df.reindex(columns=['A','B'])
14491449
tm.assert_frame_equal(result,expected)
14501450

1451+
def test_column_multiindex(self):
1452+
# GH 4710
1453+
# recreate multi-indexes properly
1454+
1455+
index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second'])
1456+
df = DataFrame(np.arange(12).reshape(3,4), columns=index)
1457+
1458+
with ensure_clean(self.path) as store:
1459+
1460+
store.put('df',df)
1461+
tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True)
1462+
1463+
store.put('df1',df,format='table')
1464+
tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)
1465+
1466+
self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A'])
1467+
self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True)
1468+
1469+
# non_index_axes name
1470+
df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo'))
1471+
1472+
with ensure_clean(self.path) as store:
1473+
1474+
store.put('df1',df,format='table')
1475+
tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True)
1476+
14511477
def test_pass_spec_to_storer(self):
14521478

14531479
df = tm.makeDataFrame()

0 commit comments

Comments
 (0)