Skip to content

Commit d461d41

Browse files
committed
ENH: HDFStore will retain index attributes (freq,tz,name) on recreation (GH3499_)
TST: added legacy_table_0.11 table and tests
1 parent d925966 commit d461d41

File tree

5 files changed

+113
-21
lines changed

5 files changed

+113
-21
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pandas 0.11.1
3838
- Fixed various issues with internal pprinting code, the repr() for various objects
3939
including TimeStamp and *Index now produces valid python code strings and
4040
can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_)
41+
- ``HDFStore`` will retain index attributes (freq,tz,name) on recreation (GH3499_)
4142
4243
**API Changes**
4344

pandas/core/index.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,12 @@ class Index(np.ndarray):
8383

8484
_engine_type = _index.ObjectEngine
8585

86-
def __new__(cls, data, dtype=None, copy=False, name=None):
86+
def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs):
8787
from pandas.tseries.period import PeriodIndex
8888
if isinstance(data, np.ndarray):
8989
if issubclass(data.dtype.type, np.datetime64):
9090
from pandas.tseries.index import DatetimeIndex
91-
result = DatetimeIndex(data, copy=copy, name=name)
91+
result = DatetimeIndex(data, copy=copy, name=name, **kwargs)
9292
if dtype is not None and _o_dtype == dtype:
9393
return Index(result.to_pydatetime(), dtype=_o_dtype)
9494
else:
@@ -102,7 +102,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
102102
except TypeError:
103103
pass
104104
elif isinstance(data, PeriodIndex):
105-
return PeriodIndex(data, copy=copy, name=name)
105+
return PeriodIndex(data, copy=copy, name=name, **kwargs)
106106

107107
if issubclass(data.dtype.type, np.integer):
108108
return Int64Index(data, copy=copy, dtype=dtype, name=name)
@@ -123,10 +123,10 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
123123
if (inferred.startswith('datetime') or
124124
tslib.is_timestamp_array(subarr)):
125125
from pandas.tseries.index import DatetimeIndex
126-
return DatetimeIndex(subarr, copy=copy, name=name)
126+
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
127127

128128
elif inferred == 'period':
129-
return PeriodIndex(subarr, name=name)
129+
return PeriodIndex(subarr, name=name, **kwargs)
130130

131131
subarr = subarr.view(cls)
132132
subarr.name = name

pandas/io/pytables.py

+68-12
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,8 @@ class IndexCol(object):
943943
is_searchable = False
944944

945945
def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
946-
name=None, axis=None, kind_attr=None, pos=None, **kwargs):
946+
name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
947+
index_name=None, **kwargs):
947948
self.values = values
948949
self.kind = kind
949950
self.typ = typ
@@ -953,6 +954,9 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
953954
self.kind_attr = kind_attr
954955
self.axis = axis
955956
self.pos = pos
957+
self.freq = freq
958+
self.tz = tz
959+
self.index_name = None
956960
self.table = None
957961

958962
if name is not None:
@@ -1023,7 +1027,22 @@ def convert(self, values, nan_rep):
10231027
values = values[self.cname]
10241028
except:
10251029
pass
1026-
self.values = Index(_maybe_convert(values, self.kind))
1030+
1031+
kwargs = dict()
1032+
if self.freq is not None:
1033+
kwargs['freq'] = self.freq
1034+
if self.tz is not None:
1035+
kwargs['tz'] = self.tz
1036+
if self.name is not None:
1037+
kwargs['name'] = self.index_name
1038+
try:
1039+
self.values = Index(_maybe_convert(values, self.kind), **kwargs)
1040+
except:
1041+
1042+
# if the output freq is different that what we recorded, then infer it
1043+
if 'freq' in kwargs:
1044+
kwargs['freq'] = 'infer'
1045+
self.values = Index(_maybe_convert(values, self.kind), **kwargs)
10271046
return self
10281047

10291048
def take_data(self):
@@ -1098,6 +1117,30 @@ def validate_attr(self, append):
10981117
raise TypeError("incompatible kind in col [%s - %s]" %
10991118
(existing_kind, self.kind))
11001119

1120+
def update_info(self, info):
1121+
""" set/update the info for this indexable with the key/value
1122+
if validate is True, then raise if an existing value does not match the value """
1123+
1124+
for key in ['freq','tz','name']:
1125+
1126+
value = getattr(self,key,None)
1127+
1128+
try:
1129+
idx = info[self.name]
1130+
except:
1131+
idx = info[self.name] = dict()
1132+
1133+
existing_value = idx.get(key)
1134+
if key in idx and existing_value != value:
1135+
raise ValueError("invalid info for [%s] for [%s]"""
1136+
", existing_value [%s] conflicts with new value [%s]" % (self.name,
1137+
key,existing_value,value))
1138+
1139+
if value is not None or existing_value is not None:
1140+
idx[key] = value
1141+
1142+
return self
1143+
11011144
def get_attr(self):
11021145
""" set the kind for this colummn """
11031146
self.kind = getattr(self.attrs, self.kind_attr, None)
@@ -2060,6 +2103,7 @@ def __init__(self, *args, **kwargs):
20602103
self.non_index_axes = []
20612104
self.values_axes = []
20622105
self.data_columns = []
2106+
self.info = dict()
20632107
self.nan_rep = None
20642108
self.selection = None
20652109

@@ -2173,18 +2217,20 @@ def values_cols(self):
21732217

21742218
def set_attrs(self):
21752219
""" set our table type & indexables """
2176-
self.attrs.table_type = self.table_type
2177-
self.attrs.index_cols = self.index_cols()
2178-
self.attrs.values_cols = self.values_cols()
2220+
self.attrs.table_type = self.table_type
2221+
self.attrs.index_cols = self.index_cols()
2222+
self.attrs.values_cols = self.values_cols()
21792223
self.attrs.non_index_axes = self.non_index_axes
21802224
self.attrs.data_columns = self.data_columns
2181-
self.attrs.nan_rep = self.nan_rep
2182-
self.attrs.levels = self.levels
2225+
self.attrs.info = self.info
2226+
self.attrs.nan_rep = self.nan_rep
2227+
self.attrs.levels = self.levels
21832228

21842229
def get_attrs(self):
21852230
""" retrieve our attributes """
21862231
self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or []
21872232
self.data_columns = getattr(self.attrs,'data_columns',None) or []
2233+
self.info = getattr(self.attrs,'info',None) or dict()
21882234
self.nan_rep = getattr(self.attrs,'nan_rep',None)
21892235
self.levels = getattr(self.attrs,'levels',None) or []
21902236
t = self.table
@@ -2221,8 +2267,17 @@ def indexables(self):
22212267
d = self.description
22222268
self._indexables = []
22232269

2270+
# info
2271+
info = getattr(self.attrs,'info',None) or dict()
2272+
22242273
# index columns
2225-
self._indexables.extend([IndexCol(name=name, axis=axis, pos=i) for i, (axis, name) in enumerate(self.attrs.index_cols)])
2274+
def create_index(i, axis, name):
2275+
kwargs = dict( name=name, axis=axis, pos=i )
2276+
i = info.get(name)
2277+
if i is not None and len(i):
2278+
kwargs.update(i)
2279+
return IndexCol(**kwargs)
2280+
self._indexables.extend([ create_index(i,axis,name) for i, (axis, name) in enumerate(self.attrs.index_cols)])
22262281

22272282
# values columns
22282283
dc = set(self.data_columns)
@@ -2379,7 +2434,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
23792434
existing_table.infer_axes()
23802435
axes = [ a.axis for a in existing_table.index_axes]
23812436
data_columns = existing_table.data_columns
2382-
nan_rep = existing_table.nan_rep
2437+
nan_rep = existing_table.nan_rep
2438+
self.info = existing_table.info
23832439
else:
23842440
existing_table = None
23852441

@@ -2421,7 +2477,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
24212477
self.non_index_axes.append((i, append_axis))
24222478

24232479
# set axis positions (based on the axes)
2424-
self.index_axes = [index_axes_map[a].set_pos(j) for j,
2480+
self.index_axes = [index_axes_map[a].set_pos(j).update_info(self.info) for j,
24252481
a in enumerate(axes)]
24262482
j = len(self.index_axes)
24272483

@@ -3042,10 +3098,10 @@ class AppendableNDimTable(AppendablePanelTable):
30423098
def _convert_index(index):
30433099
if isinstance(index, DatetimeIndex):
30443100
converted = index.asi8
3045-
return IndexCol(converted, 'datetime64', _tables().Int64Col())
3101+
return IndexCol(converted, 'datetime64', _tables().Int64Col(), freq=getattr(index,'freq',None), tz=getattr(index,'tz',None))
30463102
elif isinstance(index, (Int64Index, PeriodIndex)):
30473103
atom = _tables().Int64Col()
3048-
return IndexCol(index.values, 'integer', atom)
3104+
return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None))
30493105

30503106
if isinstance(index, MultiIndex):
30513107
raise Exception('MultiIndex not supported here!')
Binary file not shown.

pandas/io/tests/test_pytables.py

+39-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import datetime
88
import numpy as np
99

10+
import pandas
1011
from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
1112
date_range, Index)
1213
from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning, PerformanceWarning
@@ -2041,6 +2042,22 @@ def test_select_iterator(self):
20412042
result = concat(results)
20422043
tm.assert_frame_equal(expected, result)
20432044

2045+
def test_retain_index_attributes(self):
2046+
2047+
# GH 3499, losing frequency info on index recreation
2048+
df = DataFrame(dict(A = Series(xrange(3),
2049+
index=date_range('2000-1-1',periods=3,freq='H'))))
2050+
2051+
with ensure_clean(self.path) as store:
2052+
store.put('data', df, table=True)
2053+
2054+
result = store.get('data')
2055+
tm.assert_frame_equal(df,result)
2056+
2057+
for attr in ['freq','tz']:
2058+
for idx in ['index','columns']:
2059+
self.assert_(getattr(getattr(df,idx),attr,None) == getattr(getattr(result,idx),attr,None))
2060+
20442061
def test_panel_select(self):
20452062

20462063
wp = tm.makePanel()
@@ -2437,6 +2454,16 @@ def test_legacy_0_10_read(self):
24372454
finally:
24382455
safe_close(store)
24392456

2457+
def test_legacy_0_11_read(self):
2458+
# legacy from 0.11
2459+
try:
2460+
store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_0.11.h5'), 'r')
2461+
df = store.select('df')
2462+
df1 = store.select('df1')
2463+
mi = store.select('mi')
2464+
finally:
2465+
safe_close(store)
2466+
24402467
def test_copy(self):
24412468

24422469
def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
@@ -2497,14 +2524,22 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
24972524
def test_legacy_table_write(self):
24982525
raise nose.SkipTest
24992526

2500-
# legacy table types
2527+
store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a')
2528+
25012529
df = tm.makeDataFrame()
25022530
wp = tm.makePanel()
25032531

2504-
store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table.h5'), 'a')
2532+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
2533+
['one', 'two', 'three']],
2534+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
2535+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
2536+
names=['foo', 'bar'])
2537+
df = DataFrame(np.random.randn(10, 3), index=index,
2538+
columns=['A', 'B', 'C'])
2539+
store.append('mi', df)
25052540

2506-
self.assertRaises(Exception, store.append, 'df1', df)
2507-
self.assertRaises(Exception, store.append, 'wp1', wp)
2541+
df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10))
2542+
store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 })
25082543

25092544
store.close()
25102545

0 commit comments

Comments
 (0)