Skip to content

BUG/TST: fixed up retaining of index names in the table .info (like freq/tz) #3549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 9, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ pandas 0.11.1
- ``HDFStore``

- will retain index attributes (freq,tz,name) on recreation (GH3499_)
- will warn with a FrequencyWarning if you are attempting to append
an index with a different frequency than the existing
- will warn with a AttributeConflictWarning if you are attempting to append
an index with a different frequency than the existing, or attempting
to append an index with a different name than the existing
- support datelike columns with a timezone as data_columns (GH2852_)
- table writing performance improvements.

Expand Down
5 changes: 3 additions & 2 deletions doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ Enhancements
- ``HDFStore``

- will retain index attributes (freq,tz,name) on recreation (GH3499_)
- will warn with a FrequencyWarning if you are attempting to append
an index with a different frequency than the existing
- will warn with a AttributeConflictWarning if you are attempting to append
an index with a different frequency than the existing, or attempting
to append an index with a different name than the existing
- support datelike columns with a timezone as data_columns (GH2852_)

See the `full release notes
Expand Down
82 changes: 52 additions & 30 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ class IncompatibilityWarning(Warning): pass
where criteria is being ignored as this version [%s] is too old (or not-defined),
read the file in and write it out to a new file to upgrade (with the copy_to method)
"""
class FrequencyWarning(Warning): pass
frequency_doc = """
the frequency of the existing index is [%s] which conflicts with the new freq [%s],
resetting the frequency to None
class AttributeConflictWarning(Warning): pass
attribute_conflict_doc = """
the [%s] attribute of the existing index is [%s] which conflicts with the new [%s],
resetting the attribute to None
"""
class PerformanceWarning(Warning): pass
performance_doc = """
Expand Down Expand Up @@ -873,9 +873,9 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
if not s.is_table or (s.is_table and table is None and s.is_exists):
raise ValueError('Can only append to Tables')
if not s.is_exists:
s.set_info()
s.set_object_info()
else:
s.set_info()
s.set_object_info()

if not s.is_table and complib:
raise ValueError('Compression not supported on non-table')
Expand Down Expand Up @@ -949,7 +949,7 @@ class IndexCol(object):
is_an_indexable = True
is_data_indexable = True
is_searchable = False
_info_fields = ['freq','tz','name']
_info_fields = ['freq','tz','index_name']

def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
Expand All @@ -965,7 +965,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
self.pos = pos
self.freq = freq
self.tz = tz
self.index_name = None
self.index_name = index_name
self.table = None

if name is not None:
Expand Down Expand Up @@ -1042,7 +1042,7 @@ def convert(self, values, nan_rep):
kwargs['freq'] = self.freq
if self.tz is not None:
kwargs['tz'] = self.tz
if self.name is not None:
if self.index_name is not None:
kwargs['name'] = self.index_name
try:
self.values = Index(_maybe_convert(values, self.kind), **kwargs)
Expand Down Expand Up @@ -1128,7 +1128,7 @@ def validate_attr(self, append):

def update_info(self, info):
""" set/update the info for this indexable with the key/value
if validate is True, then raise if an existing value does not match the value """
if there is a conflict raise/warn as needed """

for key in self._info_fields:

Expand All @@ -1140,15 +1140,16 @@ def update_info(self, info):
idx = info[self.name] = dict()

existing_value = idx.get(key)
if key in idx and existing_value != value:
if key in idx and value is not None and existing_value != value:

# frequency just warn
if key == 'freq':
ws = frequency_doc % (existing_value,value)
warnings.warn(ws, FrequencyWarning)
# frequency/name just warn
if key in ['freq','index_name']:
ws = attribute_conflict_doc % (key,existing_value,value)
warnings.warn(ws, AttributeConflictWarning)

# reset
idx[key] = None
setattr(self,key,None)

else:
raise ValueError("invalid info for [%s] for [%s]"""
Expand Down Expand Up @@ -1554,7 +1555,7 @@ def __repr__(self):
def __str__(self):
return self.__repr__()

def set_info(self):
def set_object_info(self):
""" set my pandas type & version """
self.attrs.pandas_type = self.pandas_kind
self.attrs.pandas_version = _version
Expand Down Expand Up @@ -2275,16 +2276,20 @@ def values_cols(self):
""" return a list of my values cols """
return [i.cname for i in self.values_axes]

def set_info(self):
""" update our table index info """
self.attrs.info = self.info

def set_attrs(self):
""" set our table type & indexables """
self.attrs.table_type = self.table_type
self.attrs.index_cols = self.index_cols()
self.attrs.values_cols = self.values_cols()
self.attrs.non_index_axes = self.non_index_axes
self.attrs.data_columns = self.data_columns
self.attrs.info = self.info
self.attrs.nan_rep = self.nan_rep
self.attrs.levels = self.levels
self.set_info()

def get_attrs(self):
""" retrieve our attributes """
Expand Down Expand Up @@ -2487,7 +2492,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
axes = [ a.axis for a in existing_table.index_axes]
data_columns = existing_table.data_columns
nan_rep = existing_table.nan_rep
self.info = existing_table.info
self.info = copy.copy(existing_table.info)
else:
existing_table = None

Expand Down Expand Up @@ -2879,6 +2884,9 @@ def write(self, obj, axes=None, append=False, complib=None,
else:
table = self.table

# update my info
self.set_info()

# validate the axes and set the kinds
for a in self.axes:
a.validate_and_set(table, append)
Expand Down Expand Up @@ -3036,10 +3044,10 @@ def read(self, where=None, columns=None, **kwargs):
if self.is_transposed:
values = a.cvalues
index_ = cols
cols_ = Index(index)
cols_ = Index(index,name=getattr(index,'name',None))
else:
values = a.cvalues.T
index_ = Index(index)
index_ = Index(index,name=getattr(index,'name',None))
cols_ = cols

# if we have a DataIndexableCol, its shape will only be 1 dim
Expand Down Expand Up @@ -3157,12 +3165,17 @@ class AppendableNDimTable(AppendablePanelTable):
obj_type = Panel4D

def _convert_index(index):
index_name = getattr(index,'name',None)

if isinstance(index, DatetimeIndex):
converted = index.asi8
return IndexCol(converted, 'datetime64', _tables().Int64Col(), freq=getattr(index,'freq',None), tz=getattr(index,'tz',None))
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
index_name=index_name)
elif isinstance(index, (Int64Index, PeriodIndex)):
atom = _tables().Int64Col()
return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None))
return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None),
index_name=index_name)

if isinstance(index, MultiIndex):
raise Exception('MultiIndex not supported here!')
Expand All @@ -3173,36 +3186,45 @@ def _convert_index(index):

if inferred_type == 'datetime64':
converted = values.view('i8')
return IndexCol(converted, 'datetime64', _tables().Int64Col())
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
index_name=index_name)
elif inferred_type == 'datetime':
converted = np.array([(time.mktime(v.timetuple()) +
v.microsecond / 1E6) for v in values],
dtype=np.float64)
return IndexCol(converted, 'datetime', _tables().Time64Col())
return IndexCol(converted, 'datetime', _tables().Time64Col(),
index_name=index_name)
elif inferred_type == 'date':
converted = np.array([time.mktime(v.timetuple()) for v in values],
dtype=np.int32)
return IndexCol(converted, 'date', _tables().Time32Col())
return IndexCol(converted, 'date', _tables().Time32Col(),
index_name=index_name)
elif inferred_type == 'string':
# atom = _tables().ObjectAtom()
# return np.asarray(values, dtype='O'), 'object', atom

converted = np.array(list(values), dtype=np.str_)
itemsize = converted.dtype.itemsize
return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize)
return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
index_name=index_name)
elif inferred_type == 'unicode':
atom = _tables().ObjectAtom()
return IndexCol(np.asarray(values, dtype='O'), 'object', atom)
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
index_name=index_name)
elif inferred_type == 'integer':
# take a guess for now, hope the values fit
atom = _tables().Int64Col()
return IndexCol(np.asarray(values, dtype=np.int64), 'integer', atom)
return IndexCol(np.asarray(values, dtype=np.int64), 'integer', atom,
index_name=index_name)
elif inferred_type == 'floating':
atom = _tables().Float64Col()
return IndexCol(np.asarray(values, dtype=np.float64), 'float', atom)
return IndexCol(np.asarray(values, dtype=np.float64), 'float', atom,
index_name=index_name)
else: # pragma: no cover
atom = _tables().ObjectAtom()
return IndexCol(np.asarray(values, dtype='O'), 'object', atom)
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
index_name=index_name)

def _unconvert_index(data, kind):
if kind == 'datetime64':
Expand Down
31 changes: 22 additions & 9 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
import pandas
from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
date_range, Index)
from pandas.io.pytables import (HDFStore, get_store, Term,
from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf,
IncompatibilityWarning, PerformanceWarning,
FrequencyWarning)
AttributeConflictWarning)
import pandas.util.testing as tm
from pandas.tests.test_series import assert_series_equal
from pandas.tests.test_frame import assert_frame_equal
Expand Down Expand Up @@ -109,8 +109,6 @@ def test_conv_read_write(self):

try:

from pandas import read_hdf

def roundtrip(key, obj,**kwargs):
obj.to_hdf(self.path, key,**kwargs)
return read_hdf(self.path, key)
Expand Down Expand Up @@ -2089,17 +2087,17 @@ def test_retain_index_attributes(self):
result = store.get('data')
tm.assert_frame_equal(df,result)

for attr in ['freq','tz']:
for attr in ['freq','tz','name']:
for idx in ['index','columns']:
self.assert_(getattr(getattr(df,idx),attr,None) == getattr(getattr(result,idx),attr,None))


# try to append a table with a different frequency
warnings.filterwarnings('ignore', category=FrequencyWarning)
warnings.filterwarnings('ignore', category=AttributeConflictWarning)
df2 = DataFrame(dict(A = Series(xrange(3),
index=date_range('2002-1-1',periods=3,freq='D'))))
store.append('data',df2)
warnings.filterwarnings('always', category=FrequencyWarning)
warnings.filterwarnings('always', category=AttributeConflictWarning)

self.assert_(store.get_storer('data').info['index']['freq'] is None)

Expand All @@ -2114,12 +2112,27 @@ def test_retain_index_attributes(self):
def test_retain_index_attributes2(self):

with tm.ensure_clean(self.path) as path:
warnings.filterwarnings('ignore', category=FrequencyWarning)

warnings.filterwarnings('ignore', category=AttributeConflictWarning)

df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H'))))
df.to_hdf(path,'data',mode='w',append=True)
df2 = DataFrame(dict(A = Series(xrange(3), index=date_range('2002-1-1',periods=3,freq='D'))))
df2.to_hdf(path,'data',append=True)
warnings.filterwarnings('always', category=FrequencyWarning)

idx = date_range('2000-1-1',periods=3,freq='H')
idx.name = 'foo'
df = DataFrame(dict(A = Series(xrange(3), index=idx)))
df.to_hdf(path,'data',mode='w',append=True)
self.assert_(read_hdf(path,'data').index.name == 'foo')

idx2 = date_range('2001-1-1',periods=3,freq='H')
idx2.name = 'bar'
df2 = DataFrame(dict(A = Series(xrange(3), index=idx2)))
df2.to_hdf(path,'data',append=True)
self.assert_(read_hdf(path,'data').index.name is None)

warnings.filterwarnings('always', category=AttributeConflictWarning)

def test_panel_select(self):

Expand Down