diff --git a/RELEASE.rst b/RELEASE.rst index 981fa5bed257d..021b3e64e12f8 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -22,6 +22,12 @@ Where to get it * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org + ``HDFStore`` + + - Fix weird PyTables error when using too many selectors in a where + - Provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df']) + - Internally, change all variables to be private-like (now have leading underscore) + pandas 0.10.1 ============= diff --git a/doc/source/io.rst b/doc/source/io.rst index 2b0145dba5f24..100ca9e251234 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1021,6 +1021,9 @@ In a current or later Python session, you can retrieve stored objects: # store.get('df') is an equivalent method store['df'] + # dotted (attribute) access provides get as well + store.df + Deletion of the object specified by the key .. ipython:: python @@ -1363,6 +1366,32 @@ Notes & Caveats # we have provided a minimum minor_axis indexable size store.root.wp_big_strings.table +DataTypes +~~~~~~~~~ + +``HDFStore`` will map an object dtype to the ``PyTables`` underlying dtype. This means the following types are known to work: + + - floating : ``float64, float32, float16`` *(using* ``np.nan`` *to represent invalid values)* + - integer : ``int64, int32, int8, uint64, uint32, uint8`` + - bool + - datetime64[ns] *(using* ``NaT`` *to represent invalid values)* + - object : ``strings`` *(using* ``np.nan`` *to represent invalid values)* + +Currently, ``unicode`` and ``datetime`` columns (represented with a dtype of ``object``), **WILL FAIL**. In addition, even though a column may look like a ``datetime64[ns]``, +if it contains ``np.nan``, this **WILL FAIL**. You can try to convert datetimelike columns to proper ``datetime64[ns]`` columns, that possibily contain ``NaT`` to represent invalid values. (Some of these issues have been addressed and these conversion may not be necessary in future versions of pandas) + + .. ipython:: python + + import datetime + df = DataFrame(dict(datelike = Series([datetime.datetime(2001,1,1),datetime.datetime(2001,1,2),np.nan]))) + df + df.dtypes + + # to convert + df['datelike'] = Series(df['datelike'].values,dtype='M8[ns]') + df + df.dtypes + External Compatibility ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.10.2.txt b/doc/source/v0.10.2.txt new file mode 100644 index 0000000000000..e9fed5b36f3cd --- /dev/null +++ b/doc/source/v0.10.2.txt @@ -0,0 +1,18 @@ +.. _whatsnew_0102: + +v0.10.2 (February ??, 2013) +--------------------------- + +This is a minor release from 0.10.1 and includes many new features and +enhancements along with a large number of bug fixes. There are also a number of +important API changes that long-time pandas users should pay close attention +to. + +**Enhancements** + + - In ``HDFStore``, provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df']) + +See the `full release notes +`__ or issue tracker +on GitHub for a complete list. + diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index 6c125c45a2599..646610ecccd88 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -16,6 +16,8 @@ What's New These are new features and improvements of note in each release. +.. include:: v0.10.2.txt + .. include:: v0.10.1.txt .. include:: v0.10.0.txt diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 78bd204f26993..1a00ff522ccda 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -197,19 +197,19 @@ def __init__(self, path, mode='a', complevel=None, complib=None, except ImportError: # pragma: no cover raise Exception('HDFStore requires PyTables') - self.path = path - self.mode = mode - self.handle = None - self.complevel = complevel - self.complib = complib - self.fletcher32 = fletcher32 - self.filters = None + self._path = path + self._mode = mode + self._handle = None + self._complevel = complevel + self._complib = complib + self._fletcher32 = fletcher32 + self._filters = None self.open(mode=mode, warn=False) @property def root(self): """ return the root node """ - return self.handle.root + return self._handle.root def __getitem__(self, key): return self.get(key) @@ -220,10 +220,19 @@ def __setitem__(self, key, value): def __delitem__(self, key): return self.remove(key) + def __getattr__(self, name): + """ allow attribute access to get stores """ + try: + return self.get(name) + except: + pass + raise AttributeError("'%s' object has no attribute '%s'" % + (type(self).__name__, name)) + def __contains__(self, key): """ check for existance of this key can match the exact pathname or the pathnm w/o the leading '/' - """ + """ node = self.get_node(key) if node is not None: name = node._v_pathname @@ -234,7 +243,7 @@ def __len__(self): return len(self.groups()) def __repr__(self): - output = '%s\nFile path: %s\n' % (type(self), self.path) + output = '%s\nFile path: %s\n' % (type(self), self._path) if len(self.keys()): keys = [] @@ -277,7 +286,7 @@ def open(self, mode='a', warn=True): mode : {'a', 'w', 'r', 'r+'}, default 'a' See HDFStore docstring or tables.openFile for info about modes """ - self.mode = mode + self._mode = mode if warn and mode == 'w': # pragma: no cover while True: response = raw_input("Re-opening as mode='w' will delete the " @@ -286,22 +295,22 @@ def open(self, mode='a', warn=True): break elif response == 'n': return - if self.handle is not None and self.handle.isopen: - self.handle.close() + if self._handle is not None and self._handle.isopen: + self._handle.close() - if self.complib is not None: - if self.complevel is None: - self.complevel = 9 - self.filters = _tables().Filters(self.complevel, - self.complib, - fletcher32=self.fletcher32) + if self._complib is not None: + if self._complevel is None: + self._complevel = 9 + self._filters = _tables().Filters(self._complevel, + self._complib, + fletcher32=self._fletcher32) try: - self.handle = h5_open(self.path, self.mode) + self._handle = h5_open(self._path, self._mode) except IOError, e: # pragma: no cover if 'can not be written' in str(e): - print 'Opening %s in read-only mode' % self.path - self.handle = h5_open(self.path, 'r') + print 'Opening %s in read-only mode' % self._path + self._handle = h5_open(self._path, 'r') else: raise @@ -309,13 +318,13 @@ def close(self): """ Close the PyTables file handle """ - self.handle.close() + self._handle.close() def flush(self): """ Force all buffered modifications to be written to disk """ - self.handle.flush() + self._handle.flush() def get(self, key): """ @@ -617,14 +626,14 @@ def create_table_index(self, key, **kwargs): def groups(self): """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ _tables() - return [ g for g in self.handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ] + return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ] def get_node(self, key): """ return the node with the key or None if it does not exist """ try: if not key.startswith('/'): key = '/' + key - return self.handle.getNode(self.root, key) + return self._handle.getNode(self.root, key) except: return None @@ -751,7 +760,7 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com # remove the node if we are not appending if group is not None and not append: - self.handle.removeNode(group, recursive=True) + self._handle.removeNode(group, recursive=True) group = None if group is None: @@ -768,7 +777,7 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com new_path += p group = self.get_node(new_path) if group is None: - group = self.handle.createGroup(path, p) + group = self._handle.createGroup(path, p) path = new_path s = self._create_storer(group, value, table=table, append=append, **kwargs) @@ -1304,28 +1313,28 @@ def pathname(self): return self.group._v_pathname @property - def handle(self): - return self.parent.handle + def _handle(self): + return self.parent._handle @property def _quiet(self): return self.parent._quiet @property - def filters(self): - return self.parent.filters + def _filters(self): + return self.parent._filters @property - def complevel(self): - return self.parent.complevel + def _complevel(self): + return self.parent._complevel @property - def fletcher32(self): - return self.parent.fletcher32 + def _fletcher32(self): + return self.parent._fletcher32 @property - def complib(self): - return self.parent.complib + def _complib(self): + return self.parent._complib @property def attrs(self): @@ -1380,7 +1389,7 @@ def write(self, **kwargs): def delete(self, where = None, **kwargs): """ support fully deleting the node in its entirety (only) - where specification must be None """ if where is None: - self.handle.removeNode(self.group, recursive=True) + self._handle.removeNode(self.group, recursive=True) return None raise NotImplementedError("cannot delete on an abstract storer") @@ -1583,7 +1592,7 @@ def read_index_node(self, node): def write_array(self, key, value): if key in self.group: - self.handle.removeNode(self.group, key) + self._handle.removeNode(self.group, key) # Transform needed to interface with pytables row/col notation empty_array = any(x == 0 for x in value.shape) @@ -1593,7 +1602,7 @@ def write_array(self, key, value): value = value.T transposed = True - if self.filters is not None: + if self._filters is not None: atom = None try: # get the atom for this datatype @@ -1603,9 +1612,9 @@ def write_array(self, key, value): if atom is not None: # create an empty chunked array and fill it from value - ca = self.handle.createCArray(self.group, key, atom, + ca = self._handle.createCArray(self.group, key, atom, value.shape, - filters=self.filters) + filters=self._filters) ca[:] = value getattr(self.group, key)._v_attrs.transposed = transposed return @@ -1622,21 +1631,21 @@ def write_array(self, key, value): ws = performance_doc % (inferred_type,key) warnings.warn(ws, PerformanceWarning) - vlarr = self.handle.createVLArray(self.group, key, + vlarr = self._handle.createVLArray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) elif value.dtype.type == np.datetime64: - self.handle.createArray(self.group, key, value.view('i8')) + self._handle.createArray(self.group, key, value.view('i8')) getattr(self.group, key)._v_attrs.value_type = 'datetime64' else: if empty_array: # ugly hack for length 0 axes arr = np.empty((1,) * value.ndim) - self.handle.createArray(self.group, key, arr) + self._handle.createArray(self.group, key, arr) getattr(self.group, key)._v_attrs.value_type = str(value.dtype) getattr(self.group, key)._v_attrs.shape = value.shape else: - self.handle.createArray(self.group, key, value) + self._handle.createArray(self.group, key, value) getattr(self.group, key)._v_attrs.transposed = transposed @@ -1729,7 +1738,7 @@ def write(self, obj, **kwargs): for name, ss in obj.iteritems(): key = 'sparse_series_%s' % name if key not in self.group._v_children: - node = self.handle.createGroup(self.group, key) + node = self._handle.createGroup(self.group, key) else: node = getattr(self.group, key) s = SparseSeriesStorer(self.parent, node) @@ -1763,7 +1772,7 @@ def write(self, obj, **kwargs): for name, sdf in obj.iteritems(): key = 'sparse_frame_%s' % name if key not in self.group._v_children: - node = self.handle.createGroup(self.group, key) + node = self._handle.createGroup(self.group, key) else: node = getattr(self.group, key) s = SparseFrameStorer(self.parent, node) @@ -2293,13 +2302,13 @@ def create_description(self, complib=None, complevel=None, fletcher32=False, exp if complib: if complevel is None: - complevel = self.complevel or 9 + complevel = self._complevel or 9 filters = _tables().Filters(complevel=complevel, complib=complib, - fletcher32=fletcher32 or self.fletcher32) + fletcher32=fletcher32 or self._fletcher32) d['filters'] = filters - elif self.filters is not None: - d['filters'] = self.filters + elif self._filters is not None: + d['filters'] = self._filters return d @@ -2484,7 +2493,7 @@ def write(self, obj, axes=None, append=False, complib=None, expectedrows=None, **kwargs): if not append and self.is_exists: - self.handle.removeNode(self.group, 'table') + self._handle.removeNode(self.group, 'table') # create the axes self.create_axes(axes=axes, obj=obj, validate=append, @@ -2502,7 +2511,7 @@ def write(self, obj, axes=None, append=False, complib=None, self.set_attrs() # create the table - table = self.handle.createTable(self.group, **options) + table = self._handle.createTable(self.group, **options) else: table = self.table @@ -2552,6 +2561,11 @@ def write_data(self, chunksize): def write_data_chunk(self, indexes, mask, search, values): + # 0 len + for v in values: + if not np.prod(v.shape): + return + # get our function try: func = getattr(lib, "create_hdf_rows_%sd" % self.ndim) @@ -2574,7 +2588,7 @@ def delete(self, where=None, **kwargs): # delete all rows (and return the nrows) if where is None or not len(where): nrows = self.nrows - self.handle.removeNode(self.group, recursive=True) + self._handle.removeNode(self.group, recursive=True) return nrows # infer the data kind @@ -2894,6 +2908,7 @@ class Term(object): _ops = ['<=', '<', '>=', '>', '!=', '==', '='] _search = re.compile("^\s*(?P\w+)\s*(?P%s)\s*(?P.+)\s*$" % '|'.join(_ops)) + _max_selectors = 31 def __init__(self, field, op=None, value=None, queryables=None): self.field = None @@ -3006,7 +3021,7 @@ def eval(self): if self.is_in_table: # too many values to create the expression? - if len(values) <= 61: + if len(values) <= self._max_selectors: self.condition = "(%s)" % ' | '.join( ["(%s == %s)" % (self.field, v[0]) for v in values]) @@ -3138,3 +3153,15 @@ def select_coords(self): return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True) +### utilities ### + +def timeit(key,df,fn=None,remove=True,**kwargs): + if fn is None: + fn = 'timeit.h5' + store = HDFStore(fn,mode='w') + store.append(key,df,**kwargs) + store.close() + + if remove: + import os + os.remove(fn) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 934e088ddc1d3..a4df428d60d90 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -27,236 +27,293 @@ _multiprocess_can_split_ = False +# contextmanager to ensure the file cleanup +def safe_remove(path): + if path is not None: + import os + try: + os.remove(path) + except: + pass + +def safe_close(store): + try: + if store is not None: + store.close() + except: + pass + +from contextlib import contextmanager + +@contextmanager +def ensure_clean(path, mode='a', complevel=None, complib=None, + fletcher32=False): + store = HDFStore(path, mode=mode, complevel=complevel, + complib=complib, fletcher32=False) + try: + yield store + finally: + safe_close(store) + if mode == 'w' or mode == 'a': + safe_remove(path) + +# set these parameters so we don't have file sharing +tables.parameters.MAX_NUMEXPR_THREADS = 1 +tables.parameters.MAX_BLOSC_THREADS = 1 +tables.parameters.MAX_THREADS = 1 + class TestHDFStore(unittest.TestCase): - scratchpath = '__scratch__.h5' def setUp(self): warnings.filterwarnings(action='ignore', category=FutureWarning) self.path = '__%s__.h5' % tm.rands(10) - self.store = HDFStore(self.path) def tearDown(self): - self.store.close() - try: - os.remove(self.path) - except os.error: - pass + pass def test_factory_fun(self): try: - with get_store(self.scratchpath) as tbl: + with get_store(self.path) as tbl: raise ValueError('blah') except ValueError: pass + finally: + safe_remove(self.path) - with get_store(self.scratchpath) as tbl: - tbl['a'] = tm.makeDataFrame() - - with get_store(self.scratchpath) as tbl: - self.assertEquals(len(tbl), 1) - self.assertEquals(type(tbl['a']), DataFrame) - - os.remove(self.scratchpath) + try: + with get_store(self.path) as tbl: + tbl['a'] = tm.makeDataFrame() + + with get_store(self.path) as tbl: + self.assertEquals(len(tbl), 1) + self.assertEquals(type(tbl['a']), DataFrame) + finally: + safe_remove(self.path) def test_keys(self): - self.store['a'] = tm.makeTimeSeries() - self.store['b'] = tm.makeStringSeries() - self.store['c'] = tm.makeDataFrame() - self.store['d'] = tm.makePanel() - self.store['foo/bar'] = tm.makePanel() - self.assertEquals(len(self.store), 5) - self.assert_(set( - self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) + + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + store['b'] = tm.makeStringSeries() + store['c'] = tm.makeDataFrame() + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() + self.assertEquals(len(store), 5) + self.assert_(set( + store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar'])) def test_repr(self): - repr(self.store) - self.store['a'] = tm.makeTimeSeries() - self.store['b'] = tm.makeStringSeries() - self.store['c'] = tm.makeDataFrame() - self.store['d'] = tm.makePanel() - self.store['foo/bar'] = tm.makePanel() - self.store.append('e', tm.makePanel()) - df = tm.makeDataFrame() - df['obj1'] = 'foo' - df['obj2'] = 'bar' - df['bool1'] = df['A'] > 0 - df['bool2'] = df['B'] > 0 - df['bool3'] = True - df['int1'] = 1 - df['int2'] = 2 - df['timestamp1'] = Timestamp('20010102') - df['timestamp2'] = Timestamp('20010103') - df['datetime1'] = datetime.datetime(2001,1,2,0,0) - df['datetime2'] = datetime.datetime(2001,1,3,0,0) - df.ix[3:6,['obj1']] = np.nan - df = df.consolidate().convert_objects() - self.store['df'] = df + with ensure_clean(self.path) as store: + repr(store) + store['a'] = tm.makeTimeSeries() + store['b'] = tm.makeStringSeries() + store['c'] = tm.makeDataFrame() + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() + store.append('e', tm.makePanel()) - # make a random group in hdf space - self.store.handle.createGroup(self.store.handle.root,'bah') + df = tm.makeDataFrame() + df['obj1'] = 'foo' + df['obj2'] = 'bar' + df['bool1'] = df['A'] > 0 + df['bool2'] = df['B'] > 0 + df['bool3'] = True + df['int1'] = 1 + df['int2'] = 2 + df['timestamp1'] = Timestamp('20010102') + df['timestamp2'] = Timestamp('20010103') + df['datetime1'] = datetime.datetime(2001,1,2,0,0) + df['datetime2'] = datetime.datetime(2001,1,3,0,0) + df.ix[3:6,['obj1']] = np.nan + df = df.consolidate().convert_objects() + store['df'] = df + + # make a random group in hdf space + store._handle.createGroup(store._handle.root,'bah') - repr(self.store) - str(self.store) + repr(store) + str(store) def test_contains(self): - self.store['a'] = tm.makeTimeSeries() - self.store['b'] = tm.makeDataFrame() - self.store['foo/bar'] = tm.makeDataFrame() - self.assert_('a' in self.store) - self.assert_('b' in self.store) - self.assert_('c' not in self.store) - self.assert_('foo/bar' in self.store) - self.assert_('/foo/bar' in self.store) - self.assert_('/foo/b' not in self.store) - self.assert_('bar' not in self.store) - - # GH 2694 - warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) - self.store['node())'] = tm.makeDataFrame() - self.assert_('node())' in self.store) - warnings.filterwarnings('always', category=tables.NaturalNameWarning) + + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + store['b'] = tm.makeDataFrame() + store['foo/bar'] = tm.makeDataFrame() + self.assert_('a' in store) + self.assert_('b' in store) + self.assert_('c' not in store) + self.assert_('foo/bar' in store) + self.assert_('/foo/bar' in store) + self.assert_('/foo/b' not in store) + self.assert_('bar' not in store) + + # GH 2694 + warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) + store['node())'] = tm.makeDataFrame() + self.assert_('node())' in store) + warnings.filterwarnings('always', category=tables.NaturalNameWarning) def test_versioning(self): - self.store['a'] = tm.makeTimeSeries() - self.store['b'] = tm.makeDataFrame() - df = tm.makeTimeDataFrame() - self.store.remove('df1') - self.store.append('df1', df[:10]) - self.store.append('df1', df[10:]) - self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10.1') - self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10.1') - self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10.1') - - # write a file and wipe its versioning - self.store.remove('df2') - self.store.append('df2', df) - - # this is an error because its table_type is appendable, but no version - # info - self.store.get_node('df2')._v_attrs.pandas_version = None - self.assertRaises(Exception, self.store.select, 'df2') - - def test_meta(self): - raise nose.SkipTest('no meta') - - meta = {'foo': ['I love pandas ']} - s = tm.makeTimeSeries() - s.meta = meta - self.store['a'] = s - self.assert_(self.store['a'].meta == meta) - df = tm.makeDataFrame() - df.meta = meta - self.store['b'] = df - self.assert_(self.store['b'].meta == meta) - - # this should work, but because slicing doesn't propgate meta it doesn - self.store.remove('df1') - self.store.append('df1', df[:10]) - self.store.append('df1', df[10:]) - results = self.store['df1'] - # self.assert_(getattr(results,'meta',None) == meta) - - # no meta - df = tm.makeDataFrame() - self.store['b'] = df - self.assert_(hasattr(self.store['b'], 'meta') is False) + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + store['b'] = tm.makeDataFrame() + df = tm.makeTimeDataFrame() + store.remove('df1') + store.append('df1', df[:10]) + store.append('df1', df[10:]) + self.assert_(store.root.a._v_attrs.pandas_version == '0.10.1') + self.assert_(store.root.b._v_attrs.pandas_version == '0.10.1') + self.assert_(store.root.df1._v_attrs.pandas_version == '0.10.1') + + # write a file and wipe its versioning + store.remove('df2') + store.append('df2', df) + + # this is an error because its table_type is appendable, but no version + # info + store.get_node('df2')._v_attrs.pandas_version = None + self.assertRaises(Exception, store.select, 'df2') def test_reopen_handle(self): - self.store['a'] = tm.makeTimeSeries() - self.store.open('w', warn=False) - self.assert_(self.store.handle.isopen) - self.assertEquals(len(self.store), 0) + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + store.open('w', warn=False) + self.assert_(store._handle.isopen) + self.assertEquals(len(store), 0) + def test_flush(self): - self.store['a'] = tm.makeTimeSeries() - self.store.flush() + + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + store.flush() def test_get(self): - self.store['a'] = tm.makeTimeSeries() - left = self.store.get('a') - right = self.store['a'] - tm.assert_series_equal(left, right) - left = self.store.get('/a') - right = self.store['/a'] - tm.assert_series_equal(left, right) + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeSeries() + left = store.get('a') + right = store['a'] + tm.assert_series_equal(left, right) + + left = store.get('/a') + right = store['/a'] + tm.assert_series_equal(left, right) + + self.assertRaises(KeyError, store.get, 'b') - self.assertRaises(KeyError, self.store.get, 'b') + def test_getattr(self): - def test_put(self): - ts = tm.makeTimeSeries() - df = tm.makeTimeDataFrame() - self.store['a'] = ts - self.store['b'] = df[:10] - self.store['foo/bar/bah'] = df[:10] - self.store['foo'] = df[:10] - self.store['/foo'] = df[:10] - self.store.put('c', df[:10], table=True) + with ensure_clean(self.path) as store: - # not OK, not a table - self.assertRaises( - ValueError, self.store.put, 'b', df[10:], append=True) + s = tm.makeTimeSeries() + store['a'] = s - # node does not currently exist, test _is_table_type returns False in - # this case - #self.store.remove('f') - #self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) + # test attribute access + result = store.a + tm.assert_series_equal(result, s) + result = getattr(store,'a') + tm.assert_series_equal(result, s) - # can't put to a table (use append instead) - self.assertRaises(ValueError, self.store.put, 'c', df[10:], append=True) + df = tm.makeTimeDataFrame() + store['df'] = df + result = store.df + tm.assert_frame_equal(result, df) - # overwrite table - self.store.put('c', df[:10], table=True, append=False) - tm.assert_frame_equal(df[:10], self.store['c']) + # errors + self.assertRaises(AttributeError, getattr, store, 'd') - def test_put_string_index(self): + for x in ['mode','path','handle','complib']: + self.assertRaises(AttributeError, getattr, store, x) - index = Index( - ["I am a very long string index: %s" % i for i in range(20)]) - s = Series(np.arange(20), index=index) - df = DataFrame({'A': s, 'B': s}) + # not stores + for x in ['mode','path','handle','complib']: + getattr(store,"_%s" % x) - self.store['a'] = s - tm.assert_series_equal(self.store['a'], s) + def test_put(self): - self.store['b'] = df - tm.assert_frame_equal(self.store['b'], df) + with ensure_clean(self.path) as store: + + ts = tm.makeTimeSeries() + df = tm.makeTimeDataFrame() + store['a'] = ts + store['b'] = df[:10] + store['foo/bar/bah'] = df[:10] + store['foo'] = df[:10] + store['/foo'] = df[:10] + store.put('c', df[:10], table=True) + + # not OK, not a table + self.assertRaises( + ValueError, store.put, 'b', df[10:], append=True) + + # node does not currently exist, test _is_table_type returns False in + # this case + # store.remove('f') + # self.assertRaises(ValueError, store.put, 'f', df[10:], append=True) + + # can't put to a table (use append instead) + self.assertRaises(ValueError, store.put, 'c', df[10:], append=True) + + # overwrite table + store.put('c', df[:10], table=True, append=False) + tm.assert_frame_equal(df[:10], store['c']) - # mixed length - index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)]) - s = Series(np.arange(21), index=index) - df = DataFrame({'A': s, 'B': s}) - self.store['a'] = s - tm.assert_series_equal(self.store['a'], s) + def test_put_string_index(self): - self.store['b'] = df - tm.assert_frame_equal(self.store['b'], df) + with ensure_clean(self.path) as store: + + index = Index( + ["I am a very long string index: %s" % i for i in range(20)]) + s = Series(np.arange(20), index=index) + df = DataFrame({'A': s, 'B': s}) + + store['a'] = s + tm.assert_series_equal(store['a'], s) + + store['b'] = df + tm.assert_frame_equal(store['b'], df) + + # mixed length + index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)]) + s = Series(np.arange(21), index=index) + df = DataFrame({'A': s, 'B': s}) + store['a'] = s + tm.assert_series_equal(store['a'], s) + + store['b'] = df + tm.assert_frame_equal(store['b'], df) def test_put_compression(self): - df = tm.makeTimeDataFrame() - self.store.put('c', df, table=True, complib='zlib') - tm.assert_frame_equal(self.store['c'], df) + with ensure_clean(self.path) as store: + df = tm.makeTimeDataFrame() - # can't compress if table=False - self.assertRaises(ValueError, self.store.put, 'b', df, - table=False, complib='zlib') + store.put('c', df, table=True, complib='zlib') + tm.assert_frame_equal(store['c'], df) + + # can't compress if table=False + self.assertRaises(ValueError, store.put, 'b', df, + table=False, complib='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() - # can't compress if table=False - self.assertRaises(ValueError, self.store.put, 'b', df, - table=False, complib='blosc') - - self.store.put('c', df, table=True, complib='blosc') - tm.assert_frame_equal(self.store['c'], df) + with ensure_clean(self.path) as store: + # can't compress if table=False + self.assertRaises(ValueError, store.put, 'b', df, + table=False, complib='blosc') + + store.put('c', df, table=True, complib='blosc') + tm.assert_frame_equal(store['c'], df) + def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) @@ -277,432 +334,447 @@ def test_put_mixed_type(self): df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.ix[3:6, ['obj1']] = np.nan df = df.consolidate().convert_objects() - self.store.remove('df') - warnings.filterwarnings('ignore', category=PerformanceWarning) - self.store.put('df',df) - expected = self.store.get('df') - tm.assert_frame_equal(expected,df) - warnings.filterwarnings('always', category=PerformanceWarning) + with ensure_clean(self.path) as store: + store.remove('df') + warnings.filterwarnings('ignore', category=PerformanceWarning) + store.put('df',df) + expected = store.get('df') + tm.assert_frame_equal(expected,df) + warnings.filterwarnings('always', category=PerformanceWarning) + def test_append(self): - df = tm.makeTimeDataFrame() - self.store.remove('df1') - self.store.append('df1', df[:10]) - self.store.append('df1', df[10:]) - tm.assert_frame_equal(self.store['df1'], df) - - self.store.remove('df2') - self.store.put('df2', df[:10], table=True) - self.store.append('df2', df[10:]) - tm.assert_frame_equal(self.store['df2'], df) - - self.store.remove('df3') - self.store.append('/df3', df[:10]) - self.store.append('/df3', df[10:]) - tm.assert_frame_equal(self.store['df3'], df) - - # this is allowed by almost always don't want to do it - warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) - self.store.remove('/df3 foo') - self.store.append('/df3 foo', df[:10]) - self.store.append('/df3 foo', df[10:]) - tm.assert_frame_equal(self.store['df3 foo'], df) - warnings.filterwarnings('always', category=tables.NaturalNameWarning) - - # panel - wp = tm.makePanel() - self.store.remove('wp1') - self.store.append('wp1', wp.ix[:, :10, :]) - self.store.append('wp1', wp.ix[:, 10:, :]) - tm.assert_panel_equal(self.store['wp1'], wp) - - # ndim - p4d = tm.makePanel4D() - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :]) - self.store.append('p4d', p4d.ix[:, :, 10:, :]) - tm.assert_panel4d_equal(self.store['p4d'], p4d) - - # test using axis labels - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :], axes=[ - 'items', 'major_axis', 'minor_axis']) - self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ - 'items', 'major_axis', 'minor_axis']) - tm.assert_panel4d_equal(self.store['p4d'], p4d) - - # test using differnt number of items on each axis - p4d2 = p4d.copy() - p4d2['l4'] = p4d['l1'] - p4d2['l5'] = p4d['l1'] - self.store.remove('p4d2') - self.store.append( - 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) - tm.assert_panel4d_equal(self.store['p4d2'], p4d2) - - # test using differt order of items on the non-index axes - self.store.remove('wp1') - wp_append1 = wp.ix[:, :10, :] - self.store.append('wp1', wp_append1) - wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1]) - self.store.append('wp1', wp_append2) - tm.assert_panel_equal(self.store['wp1'], wp) - - # dtype issues - mizxed type in a single object column - df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) - df['mixed_column'] = 'testing' - df.ix[2, 'mixed_column'] = np.nan - self.store.remove('df') - self.store.append('df', df) - tm.assert_frame_equal(self.store['df'], df) + with ensure_clean(self.path) as store: + df = tm.makeTimeDataFrame() + store.remove('df1') + store.append('df1', df[:10]) + store.append('df1', df[10:]) + tm.assert_frame_equal(store['df1'], df) + + store.remove('df2') + store.put('df2', df[:10], table=True) + store.append('df2', df[10:]) + tm.assert_frame_equal(store['df2'], df) + + store.remove('df3') + store.append('/df3', df[:10]) + store.append('/df3', df[10:]) + tm.assert_frame_equal(store['df3'], df) + + # this is allowed by almost always don't want to do it + warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) + store.remove('/df3 foo') + store.append('/df3 foo', df[:10]) + store.append('/df3 foo', df[10:]) + tm.assert_frame_equal(store['df3 foo'], df) + warnings.filterwarnings('always', category=tables.NaturalNameWarning) + + # panel + wp = tm.makePanel() + store.remove('wp1') + store.append('wp1', wp.ix[:, :10, :]) + store.append('wp1', wp.ix[:, 10:, :]) + tm.assert_panel_equal(store['wp1'], wp) + + # ndim + p4d = tm.makePanel4D() + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :]) + store.append('p4d', p4d.ix[:, :, 10:, :]) + tm.assert_panel4d_equal(store['p4d'], p4d) + + # test using axis labels + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :], axes=[ + 'items', 'major_axis', 'minor_axis']) + store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ + 'items', 'major_axis', 'minor_axis']) + tm.assert_panel4d_equal(store['p4d'], p4d) + + # test using differnt number of items on each axis + p4d2 = p4d.copy() + p4d2['l4'] = p4d['l1'] + p4d2['l5'] = p4d['l1'] + store.remove('p4d2') + store.append( + 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) + tm.assert_panel4d_equal(store['p4d2'], p4d2) + + # test using differt order of items on the non-index axes + store.remove('wp1') + wp_append1 = wp.ix[:, :10, :] + store.append('wp1', wp_append1) + wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1]) + store.append('wp1', wp_append2) + tm.assert_panel_equal(store['wp1'], wp) + + # dtype issues - mizxed type in a single object column + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) + df['mixed_column'] = 'testing' + df.ix[2, 'mixed_column'] = np.nan + store.remove('df') + store.append('df', df) + tm.assert_frame_equal(store['df'], df) def test_append_frame_column_oriented(self): - # column oriented - df = tm.makeTimeDataFrame() - self.store.remove('df1') - self.store.append('df1', df.ix[:, :2], axes=['columns']) - self.store.append('df1', df.ix[:, 2:]) - tm.assert_frame_equal(self.store['df1'], df) - - result = self.store.select('df1', 'columns=A') - expected = df.reindex(columns=['A']) - tm.assert_frame_equal(expected, result) - - # this isn't supported - self.assertRaises(Exception, self.store.select, 'df1', ( - 'columns=A', Term('index', '>', df.index[4]))) - - # selection on the non-indexable - result = self.store.select( - 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) - expected = df.reindex(columns=['A'], index=df.index[0:4]) - tm.assert_frame_equal(expected, result) + with ensure_clean(self.path) as store: + # column oriented + df = tm.makeTimeDataFrame() + store.remove('df1') + store.append('df1', df.ix[:, :2], axes=['columns']) + store.append('df1', df.ix[:, 2:]) + tm.assert_frame_equal(store['df1'], df) + + result = store.select('df1', 'columns=A') + expected = df.reindex(columns=['A']) + tm.assert_frame_equal(expected, result) + + # this isn't supported + self.assertRaises(Exception, store.select, 'df1', ( + 'columns=A', Term('index', '>', df.index[4]))) + + # selection on the non-indexable + result = store.select( + 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) + expected = df.reindex(columns=['A'], index=df.index[0:4]) + tm.assert_frame_equal(expected, result) def test_ndim_indexables(self): """ test using ndim tables in new ways""" - p4d = tm.makePanel4D() - - def check_indexers(key, indexers): - for i, idx in enumerate(indexers): - self.assert_(getattr(getattr( - self.store.root, key).table.description, idx)._v_pos == i) - - # append then change (will take existing schema) - indexers = ['items', 'major_axis', 'minor_axis'] - - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) - self.store.append('p4d', p4d.ix[:, :, 10:, :]) - tm.assert_panel4d_equal(self.store.select('p4d'), p4d) - check_indexers('p4d', indexers) - - # same as above, but try to append with differnt axes - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) - self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ - 'labels', 'items', 'major_axis']) - tm.assert_panel4d_equal(self.store.select('p4d'), p4d) - check_indexers('p4d', indexers) - - # pass incorrect number of axes - self.store.remove('p4d') - self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[ - :, :, :10, :], axes=['major_axis', 'minor_axis']) - - # different than default indexables #1 - indexers = ['labels', 'major_axis', 'minor_axis'] - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) - self.store.append('p4d', p4d.ix[:, :, 10:, :]) - tm.assert_panel4d_equal(self.store['p4d'], p4d) - check_indexers('p4d', indexers) - - # different than default indexables #2 - indexers = ['major_axis', 'labels', 'minor_axis'] - self.store.remove('p4d') - self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) - self.store.append('p4d', p4d.ix[:, :, 10:, :]) - tm.assert_panel4d_equal(self.store['p4d'], p4d) - check_indexers('p4d', indexers) - - # partial selection - result = self.store.select('p4d', ['labels=l1']) - expected = p4d.reindex(labels=['l1']) - tm.assert_panel4d_equal(result, expected) - - # partial selection2 - result = self.store.select('p4d', [Term( - 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) - expected = p4d.reindex( - labels=['l1'], items=['ItemA'], minor_axis=['B']) - tm.assert_panel4d_equal(result, expected) - - # non-existant partial selection - result = self.store.select('p4d', [Term( - 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) - expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) - tm.assert_panel4d_equal(result, expected) + with ensure_clean(self.path) as store: + + p4d = tm.makePanel4D() + + def check_indexers(key, indexers): + for i, idx in enumerate(indexers): + self.assert_(getattr(getattr( + store.root, key).table.description, idx)._v_pos == i) + + # append then change (will take existing schema) + indexers = ['items', 'major_axis', 'minor_axis'] + + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + store.append('p4d', p4d.ix[:, :, 10:, :]) + tm.assert_panel4d_equal(store.select('p4d'), p4d) + check_indexers('p4d', indexers) + + # same as above, but try to append with differnt axes + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + store.append('p4d', p4d.ix[:, :, 10:, :], axes=[ + 'labels', 'items', 'major_axis']) + tm.assert_panel4d_equal(store.select('p4d'), p4d) + check_indexers('p4d', indexers) + + # pass incorrect number of axes + store.remove('p4d') + self.assertRaises(Exception, store.append, 'p4d', p4d.ix[ + :, :, :10, :], axes=['major_axis', 'minor_axis']) + + # different than default indexables #1 + indexers = ['labels', 'major_axis', 'minor_axis'] + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + store.append('p4d', p4d.ix[:, :, 10:, :]) + tm.assert_panel4d_equal(store['p4d'], p4d) + check_indexers('p4d', indexers) + + # different than default indexables #2 + indexers = ['major_axis', 'labels', 'minor_axis'] + store.remove('p4d') + store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) + store.append('p4d', p4d.ix[:, :, 10:, :]) + tm.assert_panel4d_equal(store['p4d'], p4d) + check_indexers('p4d', indexers) + + # partial selection + result = store.select('p4d', ['labels=l1']) + expected = p4d.reindex(labels=['l1']) + tm.assert_panel4d_equal(result, expected) + + # partial selection2 + result = store.select('p4d', [Term( + 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) + expected = p4d.reindex( + labels=['l1'], items=['ItemA'], minor_axis=['B']) + tm.assert_panel4d_equal(result, expected) + + # non-existant partial selection + result = store.select('p4d', [Term( + 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) + expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) + tm.assert_panel4d_equal(result, expected) def test_append_with_strings(self): - wp = tm.makePanel() - wp2 = wp.rename_axis( - dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) - - def check_col(key,name,size): - self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size) - - self.store.append('s1', wp, min_itemsize=20) - self.store.append('s1', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - tm.assert_panel_equal(self.store['s1'], expected) - check_col('s1', 'minor_axis', 20) - - # test dict format - self.store.append('s2', wp, min_itemsize={'minor_axis': 20}) - self.store.append('s2', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - tm.assert_panel_equal(self.store['s2'], expected) - check_col('s2', 'minor_axis', 20) - - # apply the wrong field (similar to #1) - self.store.append('s3', wp, min_itemsize={'major_axis': 20}) - self.assertRaises(Exception, self.store.append, 's3') - - # test truncation of bigger strings - self.store.append('s4', wp) - self.assertRaises(Exception, self.store.append, 's4', wp2) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - self.store.append('df_big', df) - tm.assert_frame_equal(self.store.select('df_big'), df) - check_col('df_big', 'values_block_1', 15) - - # appending smaller string ok - df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) - self.store.append('df_big', df2) - expected = concat([df, df2]) - tm.assert_frame_equal(self.store.select('df_big'), expected) - check_col('df_big', 'values_block_1', 15) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - self.store.append('df_big2', df, min_itemsize={'values': 50}) - tm.assert_frame_equal(self.store.select('df_big2'), df) - check_col('df_big2', 'values_block_1', 50) - - # bigger string on next append - self.store.append('df_new', df) - df_new = DataFrame( - [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(Exception, self.store.append, 'df_new', df_new) - - # with nans - self.store.remove('df') - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df.ix[1:4, 'string'] = np.nan - df['string2'] = 'bar' - df.ix[4:8, 'string2'] = np.nan - df['string3'] = 'bah' - df.ix[1:, 'string3'] = np.nan - self.store.append('df', df) - result = self.store.select('df') - tm.assert_frame_equal(result, df) - def test_append_with_data_columns(self): + with ensure_clean(self.path) as store: + wp = tm.makePanel() + wp2 = wp.rename_axis( + dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) + + def check_col(key,name,size): + self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size) + + store.append('s1', wp, min_itemsize=20) + store.append('s1', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) + tm.assert_panel_equal(store['s1'], expected) + check_col('s1', 'minor_axis', 20) + + # test dict format + store.append('s2', wp, min_itemsize={'minor_axis': 20}) + store.append('s2', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) + tm.assert_panel_equal(store['s2'], expected) + check_col('s2', 'minor_axis', 20) + + # apply the wrong field (similar to #1) + store.append('s3', wp, min_itemsize={'major_axis': 20}) + self.assertRaises(Exception, store.append, 's3') + + # test truncation of bigger strings + store.append('s4', wp) + self.assertRaises(Exception, store.append, 's4', wp2) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big', df) + tm.assert_frame_equal(store.select('df_big'), df) + check_col('df_big', 'values_block_1', 15) + + # appending smaller string ok + df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) + store.append('df_big', df2) + expected = concat([df, df2]) + tm.assert_frame_equal(store.select('df_big'), expected) + check_col('df_big', 'values_block_1', 15) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big2', df, min_itemsize={'values': 50}) + tm.assert_frame_equal(store.select('df_big2'), df) + check_col('df_big2', 'values_block_1', 50) + + # bigger string on next append + store.append('df_new', df) + df_new = DataFrame( + [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) + self.assertRaises(Exception, store.append, 'df_new', df_new) + + # with nans + store.remove('df') + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df.ix[1:4, 'string'] = np.nan + df['string2'] = 'bar' + df.ix[4:8, 'string2'] = np.nan + df['string3'] = 'bah' + df.ix[1:, 'string3'] = np.nan + store.append('df', df) + result = store.select('df') + tm.assert_frame_equal(result, df) - df = tm.makeTimeDataFrame() - self.store.remove('df') - self.store.append('df', df[:2], data_columns=['B']) - self.store.append('df', df[2:]) - tm.assert_frame_equal(self.store['df'], df) - - # check that we have indicies created - assert(self.store.handle.root.df.table.cols.index.is_indexed is True) - assert(self.store.handle.root.df.table.cols.B.is_indexed is True) - - # data column searching - result = self.store.select('df', [Term('B>0')]) - expected = df[df.B > 0] - tm.assert_frame_equal(result, expected) - - # data column searching (with an indexable and a data_columns) - result = self.store.select( - 'df', [Term('B>0'), Term('index', '>', df.index[3])]) - df_new = df.reindex(index=df.index[4:]) - expected = df_new[df_new.B > 0] - tm.assert_frame_equal(result, expected) - - # data column selection with a string data_column - df_new = df.copy() - df_new['string'] = 'foo' - df_new['string'][1:4] = np.nan - df_new['string'][5:6] = 'bar' - self.store.remove('df') - self.store.append('df', df_new, data_columns=['string']) - result = self.store.select('df', [Term('string', '=', 'foo')]) - expected = df_new[df_new.string == 'foo'] - tm.assert_frame_equal(result, expected) - - # using min_itemsize and a data column - def check_col(key,name,size): - self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size) - - self.store.remove('df') - self.store.append('df', df_new, data_columns=['string'], - min_itemsize={'string': 30}) - check_col('df', 'string', 30) - self.store.remove('df') - self.store.append( - 'df', df_new, data_columns=['string'], min_itemsize=30) - check_col('df', 'string', 30) - self.store.remove('df') - self.store.append('df', df_new, data_columns=['string'], - min_itemsize={'values': 30}) - check_col('df', 'string', 30) - - df_new['string2'] = 'foobarbah' - df_new['string_block1'] = 'foobarbah1' - df_new['string_block2'] = 'foobarbah2' - self.store.remove('df') - self.store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50}) - check_col('df', 'string', 30) - check_col('df', 'string2', 40) - check_col('df', 'values_block_1', 50) - - # multiple data columns - df_new = df.copy() - df_new['string'] = 'foo' - df_new['string'][1:4] = np.nan - df_new['string'][5:6] = 'bar' - df_new['string2'] = 'foo' - df_new['string2'][2:5] = np.nan - df_new['string2'][7:8] = 'bar' - self.store.remove('df') - self.store.append( - 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) - result = self.store.select('df', [Term('string', '=', 'foo'), Term( - 'string2=foo'), Term('A>0'), Term('B<0')]) - expected = df_new[(df_new.string == 'foo') & ( - df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] - tm.assert_frame_equal(result, expected) - - # yield an empty frame - result = self.store.select('df', [Term('string', '=', 'foo'), Term( - 'string2=bar'), Term('A>0'), Term('B<0')]) - expected = df_new[(df_new.string == 'foo') & ( - df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)] - tm.assert_frame_equal(result, expected) - - # doc example - df_dc = df.copy() - df_dc['string'] = 'foo' - df_dc.ix[4:6, 'string'] = np.nan - df_dc.ix[7:9, 'string'] = 'bar' - df_dc['string2'] = 'cool' - df_dc['datetime'] = Timestamp('20010102') - df_dc = df_dc.convert_objects() - df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan - - self.store.remove('df_dc') - self.store.append('df_dc', df_dc, data_columns=['B', 'C', - 'string', 'string2', 'datetime']) - result = self.store.select('df_dc', [Term('B>0')]) - - expected = df_dc[df_dc.B > 0] - tm.assert_frame_equal(result, expected) - - result = self.store.select( - 'df_dc', ['B > 0', 'C > 0', 'string == foo']) - expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & ( - df_dc.string == 'foo')] - tm.assert_frame_equal(result, expected) + def test_append_with_data_columns(self): + with ensure_clean(self.path) as store: + df = tm.makeTimeDataFrame() + store.remove('df') + store.append('df', df[:2], data_columns=['B']) + store.append('df', df[2:]) + tm.assert_frame_equal(store['df'], df) + + # check that we have indicies created + assert(store._handle.root.df.table.cols.index.is_indexed is True) + assert(store._handle.root.df.table.cols.B.is_indexed is True) + + # data column searching + result = store.select('df', [Term('B>0')]) + expected = df[df.B > 0] + tm.assert_frame_equal(result, expected) + + # data column searching (with an indexable and a data_columns) + result = store.select( + 'df', [Term('B>0'), Term('index', '>', df.index[3])]) + df_new = df.reindex(index=df.index[4:]) + expected = df_new[df_new.B > 0] + tm.assert_frame_equal(result, expected) + + # data column selection with a string data_column + df_new = df.copy() + df_new['string'] = 'foo' + df_new['string'][1:4] = np.nan + df_new['string'][5:6] = 'bar' + store.remove('df') + store.append('df', df_new, data_columns=['string']) + result = store.select('df', [Term('string', '=', 'foo')]) + expected = df_new[df_new.string == 'foo'] + tm.assert_frame_equal(result, expected) + + # using min_itemsize and a data column + def check_col(key,name,size): + self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size) + + with ensure_clean(self.path) as store: + store.remove('df') + store.append('df', df_new, data_columns=['string'], + min_itemsize={'string': 30}) + check_col('df', 'string', 30) + store.remove('df') + store.append( + 'df', df_new, data_columns=['string'], min_itemsize=30) + check_col('df', 'string', 30) + store.remove('df') + store.append('df', df_new, data_columns=['string'], + min_itemsize={'values': 30}) + check_col('df', 'string', 30) + + with ensure_clean(self.path) as store: + df_new['string2'] = 'foobarbah' + df_new['string_block1'] = 'foobarbah1' + df_new['string_block2'] = 'foobarbah2' + store.remove('df') + store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50}) + check_col('df', 'string', 30) + check_col('df', 'string2', 40) + check_col('df', 'values_block_1', 50) + + with ensure_clean(self.path) as store: + # multiple data columns + df_new = df.copy() + df_new['string'] = 'foo' + df_new['string'][1:4] = np.nan + df_new['string'][5:6] = 'bar' + df_new['string2'] = 'foo' + df_new['string2'][2:5] = np.nan + df_new['string2'][7:8] = 'bar' + store.remove('df') + store.append( + 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) + result = store.select('df', [Term('string', '=', 'foo'), Term( + 'string2=foo'), Term('A>0'), Term('B<0')]) + expected = df_new[(df_new.string == 'foo') & ( + df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] + tm.assert_frame_equal(result, expected) + + # yield an empty frame + result = store.select('df', [Term('string', '=', 'foo'), Term( + 'string2=bar'), Term('A>0'), Term('B<0')]) + expected = df_new[(df_new.string == 'foo') & ( + df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)] + tm.assert_frame_equal(result, expected) + + with ensure_clean(self.path) as store: + # doc example + df_dc = df.copy() + df_dc['string'] = 'foo' + df_dc.ix[4:6, 'string'] = np.nan + df_dc.ix[7:9, 'string'] = 'bar' + df_dc['string2'] = 'cool' + df_dc['datetime'] = Timestamp('20010102') + df_dc = df_dc.convert_objects() + df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan + + store.remove('df_dc') + store.append('df_dc', df_dc, data_columns=['B', 'C', + 'string', 'string2', 'datetime']) + result = store.select('df_dc', [Term('B>0')]) + + expected = df_dc[df_dc.B > 0] + tm.assert_frame_equal(result, expected) + + result = store.select( + 'df_dc', ['B > 0', 'C > 0', 'string == foo']) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & ( + df_dc.string == 'foo')] + tm.assert_frame_equal(result, expected) + def test_create_table_index(self): - - def col(t,column): - return getattr(self.store.get_storer(t).table.cols,column) - - # index=False - wp = tm.makePanel() - self.store.append('p5', wp, index=False) - self.store.create_table_index('p5', columns=['major_axis']) - assert(col('p5', 'major_axis').is_indexed is True) - assert(col('p5', 'minor_axis').is_indexed is False) - - # index=True - self.store.append('p5i', wp, index=True) - assert(col('p5i', 'major_axis').is_indexed is True) - assert(col('p5i', 'minor_axis').is_indexed is True) - - # default optlevels - self.store.get_storer('p5').create_index() - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - - # let's change the indexing scheme - self.store.create_table_index('p5') - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - self.store.create_table_index('p5', optlevel=9) - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'medium') - self.store.create_table_index('p5', kind='full') - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'full') - self.store.create_table_index('p5', optlevel=1, kind='light') - assert(col('p5', 'major_axis').index.optlevel == 1) - assert(col('p5', 'minor_axis').index.kind == 'light') - - # data columns - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df['string2'] = 'bar' - self.store.append('f', df, data_columns=['string', 'string2']) - assert(col('f', 'index').is_indexed is True) - assert(col('f', 'string').is_indexed is True) - assert(col('f', 'string2').is_indexed is True) - - # specify index=columns - self.store.append( - 'f2', df, index=['string'], data_columns=['string', 'string2']) - assert(col('f2', 'index').is_indexed is False) - assert(col('f2', 'string').is_indexed is True) - assert(col('f2', 'string2').is_indexed is False) - - # try to index a non-table - self.store.remove('f2') - self.store.put('f2', df) - self.assertRaises(Exception, self.store.create_table_index, 'f2') - - # try to change the version supports flag - from pandas.io import pytables - pytables._table_supports_index = False - self.assertRaises(Exception, self.store.create_table_index, 'f') - - # test out some versions - original = tables.__version__ - - for v in ['2.2', '2.2b']: - pytables._table_mod = None - pytables._table_supports_index = False - tables.__version__ = v - self.assertRaises(Exception, self.store.create_table_index, 'f') - - for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]: - pytables._table_mod = None + + with ensure_clean(self.path) as store: + + def col(t,column): + return getattr(store.get_storer(t).table.cols,column) + + # index=False + wp = tm.makePanel() + store.append('p5', wp, index=False) + store.create_table_index('p5', columns=['major_axis']) + assert(col('p5', 'major_axis').is_indexed is True) + assert(col('p5', 'minor_axis').is_indexed is False) + + # index=True + store.append('p5i', wp, index=True) + assert(col('p5i', 'major_axis').is_indexed is True) + assert(col('p5i', 'minor_axis').is_indexed is True) + + # default optlevels + store.get_storer('p5').create_index() + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + + # let's change the indexing scheme + store.create_table_index('p5') + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', optlevel=9) + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', kind='full') + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'full') + store.create_table_index('p5', optlevel=1, kind='light') + assert(col('p5', 'major_axis').index.optlevel == 1) + assert(col('p5', 'minor_axis').index.kind == 'light') + + # data columns + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df['string2'] = 'bar' + store.append('f', df, data_columns=['string', 'string2']) + assert(col('f', 'index').is_indexed is True) + assert(col('f', 'string').is_indexed is True) + assert(col('f', 'string2').is_indexed is True) + + # specify index=columns + store.append( + 'f2', df, index=['string'], data_columns=['string', 'string2']) + assert(col('f2', 'index').is_indexed is False) + assert(col('f2', 'string').is_indexed is True) + assert(col('f2', 'string2').is_indexed is False) + + # try to index a non-table + store.remove('f2') + store.put('f2', df) + self.assertRaises(Exception, store.create_table_index, 'f2') + + # try to change the version supports flag + from pandas.io import pytables pytables._table_supports_index = False - tables.__version__ = v - self.store.create_table_index('f') - pytables._table_mod = None - pytables._table_supports_index = False - tables.__version__ = original + self.assertRaises(Exception, store.create_table_index, 'f') + + # test out some versions + original = tables.__version__ + + for v in ['2.2', '2.2b']: + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = v + self.assertRaises(Exception, store.create_table_index, 'f') + + for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]: + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = v + store.create_table_index('f') + pytables._table_mod = None + pytables._table_supports_index = False + tables.__version__ = original def test_big_table_frame(self): raise nose.SkipTest('no big table frame') @@ -715,14 +787,10 @@ def test_big_table_frame(self): import time x = time.time() - try: - store = HDFStore(self.scratchpath) + with ensure_clean(self.path,mode='w') as store: store.append('df', df) rows = store.root.df.table.nrows recons = store.select('df') - finally: - store.close() - os.remove(self.scratchpath) print "\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x) @@ -743,25 +811,18 @@ def test_big_table2_frame(self): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) print "\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time) - fn = 'big_table2.h5' - - try: - def f(chunksize): - store = HDFStore(fn, mode='w') + def f(chunksize): + with ensure_clean(self.path,mode='w') as store: store.append('df', df, chunksize=chunksize) r = store.root.df.table.nrows - store.close() return r - for c in [10000, 50000, 250000]: - start_time = time.time() - print "big_table2 frame [chunk->%s]" % c - rows = f(c) - print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time) - - finally: - os.remove(fn) + for c in [10000, 50000, 250000]: + start_time = time.time() + print "big_table2 frame [chunk->%s]" % c + rows = f(c) + print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time) def test_big_put_frame(self): raise nose.SkipTest('no big put frame') @@ -777,21 +838,15 @@ def test_big_put_frame(self): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) print "\nbig_put frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time) - fn = 'big_put.h5' - - try: + with ensure_clean(self.path, mode='w') as store: start_time = time.time() store = HDFStore(fn, mode='w') store.put('df', df) - store.close() print df.get_dtype_counts() print "big_put frame [shape->%s] -> %5.2f" % (df.shape, time.time() - start_time) - finally: - os.remove(fn) - def test_big_table_panel(self): raise nose.SkipTest('no big table panel') @@ -807,27 +862,25 @@ def test_big_table_panel(self): import time x = time.time() - try: - store = HDFStore(self.scratchpath) + + + with ensure_clean(self.path, mode='w') as store: store.append('wp', wp) rows = store.root.wp.table.nrows recons = store.select('wp') - finally: - store.close() - os.remove(self.scratchpath) print "\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x) def test_append_diff_item_order(self): - raise nose.SkipTest('append diff item order') wp = tm.makePanel() wp1 = wp.ix[:, :10, :] wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] - - self.store.put('panel', wp1, table=True) - self.assertRaises(Exception, self.store.put, 'panel', wp2, - append=True) + + with ensure_clean(self.path) as store: + store.put('panel', wp1, table=True) + self.assertRaises(Exception, store.put, 'panel', wp2, + append=True) def test_append_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -838,75 +891,81 @@ def test_append_hierarchical(self): df = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) - self.store.append('mi', df) - result = self.store.select('mi') - tm.assert_frame_equal(result, df) + with ensure_clean(self.path) as store: + store.append('mi', df) + result = store.select('mi') + tm.assert_frame_equal(result, df) def test_append_misc(self): - # unsuported data types for non-tables - p4d = tm.makePanel4D() - self.assertRaises(Exception, self.store.put,'p4d',p4d) + with ensure_clean(self.path) as store: - # unsupported data type for table - s = tm.makeStringSeries() - self.assertRaises(Exception, self.store.append,'s',s) + # unsuported data types for non-tables + p4d = tm.makePanel4D() + self.assertRaises(Exception, store.put,'p4d',p4d) - # unsuported data types - self.assertRaises(Exception, self.store.put,'abc',None) - self.assertRaises(Exception, self.store.put,'abc','123') - self.assertRaises(Exception, self.store.put,'abc',123) - self.assertRaises(Exception, self.store.put,'abc',np.arange(5)) + # unsupported data type for table + s = tm.makeStringSeries() + self.assertRaises(Exception, store.append,'s',s) - df = tm.makeDataFrame() - self.store.append('df', df, chunksize=1) - result = self.store.select('df') - tm.assert_frame_equal(result, df) + # unsuported data types + self.assertRaises(Exception, store.put,'abc',None) + self.assertRaises(Exception, store.put,'abc','123') + self.assertRaises(Exception, store.put,'abc',123) + self.assertRaises(Exception, store.put,'abc',np.arange(5)) - self.store.append('df1', df, expectedrows=10) - result = self.store.select('df1') - tm.assert_frame_equal(result, df) + df = tm.makeDataFrame() + store.append('df', df, chunksize=1) + result = store.select('df') + tm.assert_frame_equal(result, df) + + store.append('df1', df, expectedrows=10) + result = store.select('df1') + tm.assert_frame_equal(result, df) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) df2 = DataFrame({'a': [4, 5, 6]}, index=date_range('1/1/2000', periods=3)) - self.store.put('frame', df1, table=True) - self.assertRaises(Exception, self.store.put, 'frame', df2, - table=True, append=True) + with ensure_clean(self.path) as store: + store.put('frame', df1, table=True) + self.assertRaises(Exception, store.put, 'frame', df2, + table=True, append=True) def test_table_values_dtypes_roundtrip(self): - df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') - self.store.append('df_f8', df1) - assert df1.dtypes == self.store['df_f8'].dtypes - - df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') - self.store.append('df_i8', df2) - assert df2.dtypes == self.store['df_i8'].dtypes - - # incompatible dtype - self.assertRaises(Exception, self.store.append, 'df_i8', df1) - - # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought) - df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A']) - self.store.append('df_f4', df1) - assert df1.dtypes == self.store['df_f4'].dtypes - assert df1.dtypes[0] == 'float32' - - # check with mixed dtypes (but not multi float types) - df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) - df1['string'] = 'foo' - self.store.append('df_mixed_dtypes1', df1) - assert (df1.dtypes == self.store['df_mixed_dtypes1'].dtypes).all() == True - assert df1.dtypes[0] == 'float32' - assert df1.dtypes[1] == 'object' - - ### this is not supported, e.g. mixed float32/float64 blocks ### - #df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) - #df1['float64'] = 1.0 - #self.store.append('df_mixed_dtypes2', df1) - #assert df1.dtypes == self.store['df_mixed_dtypes2'].dtypes).all() == True + + with ensure_clean(self.path) as store: + df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') + store.append('df_f8', df1) + assert df1.dtypes == store['df_f8'].dtypes + + df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') + store.append('df_i8', df2) + assert df2.dtypes == store['df_i8'].dtypes + + # incompatible dtype + self.assertRaises(Exception, store.append, 'df_i8', df1) + + # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought) + df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A']) + store.append('df_f4', df1) + assert df1.dtypes == store['df_f4'].dtypes + assert df1.dtypes[0] == 'float32' + + # check with mixed dtypes (but not multi float types) + df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) + df1['string'] = 'foo' + store.append('df_mixed_dtypes1', df1) + assert (df1.dtypes == store['df_mixed_dtypes1'].dtypes).all() == True + assert df1.dtypes[0] == 'float32' + assert df1.dtypes[1] == 'object' + + ### this is not supported, e.g. mixed float32/float64 blocks ### + #df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) + #df1['float64'] = 1.0 + #store.append('df_mixed_dtypes2', df1) + #assert df1.dtypes == store['df_mixed_dtypes2'].dtypes).all() == True def test_table_mixed_dtypes(self): @@ -926,8 +985,9 @@ def test_table_mixed_dtypes(self): df.ix[3:6, ['obj1']] = np.nan df = df.consolidate().convert_objects() - self.store.append('df1_mixed', df) - tm.assert_frame_equal(self.store.select('df1_mixed'), df) + with ensure_clean(self.path) as store: + store.append('df1_mixed', df) + tm.assert_frame_equal(store.select('df1_mixed'), df) # panel wp = tm.makePanel() @@ -939,8 +999,9 @@ def test_table_mixed_dtypes(self): wp['int2'] = 2 wp = wp.consolidate() - self.store.append('p1_mixed', wp) - tm.assert_panel_equal(self.store.select('p1_mixed'), wp) + with ensure_clean(self.path) as store: + store.append('p1_mixed', wp) + tm.assert_panel_equal(store.select('p1_mixed'), wp) # ndim wp = tm.makePanel4D() @@ -952,16 +1013,20 @@ def test_table_mixed_dtypes(self): wp['int2'] = 2 wp = wp.consolidate() - self.store.append('p4d_mixed', wp) - tm.assert_panel4d_equal(self.store.select('p4d_mixed'), wp) + with ensure_clean(self.path) as store: + store.append('p4d_mixed', wp) + tm.assert_panel4d_equal(store.select('p4d_mixed'), wp) def test_unimplemented_dtypes_table_columns(self): - #### currently not supported dtypes #### - for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]: - df = tm.makeDataFrame() - df[n] = f - self.assertRaises( - NotImplementedError, self.store.append, 'df1_%s' % n, df) + + with ensure_clean(self.path) as store: + + ### currently not supported dtypes #### + for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]: + df = tm.makeDataFrame() + df[n] = f + self.assertRaises( + NotImplementedError, store.append, 'df1_%s' % n, df) # frame df = tm.makeDataFrame() @@ -970,271 +1035,288 @@ def test_unimplemented_dtypes_table_columns(self): df['datetime1'] = datetime.date(2001, 1, 2) df = df.consolidate().convert_objects() - # this fails because we have a date in the object block...... - self.assertRaises(Exception, self.store.append, 'df_unimplemented', df) + with ensure_clean(self.path) as store: + # this fails because we have a date in the object block...... + self.assertRaises(Exception, store.append, 'df_unimplemented', df) def test_remove(self): - ts = tm.makeTimeSeries() - df = tm.makeDataFrame() - self.store['a'] = ts - self.store['b'] = df - self.store.remove('a') - self.assertEquals(len(self.store), 1) - tm.assert_frame_equal(df, self.store['b']) - - self.store.remove('b') - self.assertEquals(len(self.store), 0) - - # pathing - self.store['a'] = ts - self.store['b/foo'] = df - self.store.remove('foo') - self.store.remove('b/foo') - self.assertEquals(len(self.store), 1) - - self.store['a'] = ts - self.store['b/foo'] = df - self.store.remove('b') - self.assertEquals(len(self.store), 1) - - # __delitem__ - self.store['a'] = ts - self.store['b'] = df - del self.store['a'] - del self.store['b'] - self.assertEquals(len(self.store), 0) - def test_remove_where(self): + with ensure_clean(self.path) as store: - # non-existance - crit1 = Term('index', '>', 'foo') - self.store.remove('a', where=[crit1]) + ts = tm.makeTimeSeries() + df = tm.makeDataFrame() + store['a'] = ts + store['b'] = df + store.remove('a') + self.assertEquals(len(store), 1) + tm.assert_frame_equal(df, store['b']) + + store.remove('b') + self.assertEquals(len(store), 0) + + # pathing + store['a'] = ts + store['b/foo'] = df + store.remove('foo') + store.remove('b/foo') + self.assertEquals(len(store), 1) + + store['a'] = ts + store['b/foo'] = df + store.remove('b') + self.assertEquals(len(store), 1) + + # __delitem__ + store['a'] = ts + store['b'] = df + del store['a'] + del store['b'] + self.assertEquals(len(store), 0) - # try to remove non-table (with crit) - # non-table ok (where = None) - wp = tm.makePanel() - self.store.put('wp', wp, table=True) - self.store.remove('wp', [('minor_axis', ['A', 'D'])]) - rs = self.store.select('wp') - expected = wp.reindex(minor_axis=['B', 'C']) - tm.assert_panel_equal(rs, expected) - - # empty where - self.store.remove('wp') - self.store.put('wp', wp, table=True) - - # deleted number (entire table) - n = self.store.remove('wp', []) - assert(n == 120) - - # non - empty where - self.store.remove('wp') - self.store.put('wp', wp, table=True) - self.assertRaises(Exception, self.store.remove, - 'wp', ['foo']) - - # selectin non-table with a where - # self.store.put('wp2', wp, table=False) - # self.assertRaises(Exception, self.store.remove, - # 'wp2', [('column', ['A', 'D'])]) + def test_remove_where(self): + + with ensure_clean(self.path) as store: + + # non-existance + crit1 = Term('index', '>', 'foo') + store.remove('a', where=[crit1]) + + # try to remove non-table (with crit) + # non-table ok (where = None) + wp = tm.makePanel() + store.put('wp', wp, table=True) + store.remove('wp', [('minor_axis', ['A', 'D'])]) + rs = store.select('wp') + expected = wp.reindex(minor_axis=['B', 'C']) + tm.assert_panel_equal(rs, expected) + + # empty where + store.remove('wp') + store.put('wp', wp, table=True) + + # deleted number (entire table) + n = store.remove('wp', []) + assert(n == 120) + + # non - empty where + store.remove('wp') + store.put('wp', wp, table=True) + self.assertRaises(Exception, store.remove, + 'wp', ['foo']) + + # selectin non-table with a where + # store.put('wp2', wp, table=False) + # self.assertRaises(Exception, store.remove, + # 'wp2', [('column', ['A', 'D'])]) def test_remove_crit(self): - wp = tm.makePanel() - # group row removal - date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) - crit4 = Term('major_axis', date4) - self.store.put('wp3', wp, table=True) - n = self.store.remove('wp3', where=[crit4]) - assert(n == 36) - result = self.store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis - date4) - tm.assert_panel_equal(result, expected) - - # upper half - self.store.put('wp', wp, table=True) - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = Term('major_axis', '>', date) - crit2 = Term('minor_axis', ['A', 'D']) - n = self.store.remove('wp', where=[crit1]) - - assert(n == 56) - - n = self.store.remove('wp', where=[crit2]) - assert(n == 32) - - result = self.store['wp'] - expected = wp.truncate(after=date).reindex(minor=['B', 'C']) - tm.assert_panel_equal(result, expected) - - # individual row elements - self.store.put('wp2', wp, table=True) - - date1 = wp.major_axis[1:3] - crit1 = Term('major_axis', date1) - self.store.remove('wp2', where=[crit1]) - result = self.store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis - date1) - tm.assert_panel_equal(result, expected) - - date2 = wp.major_axis[5] - crit2 = Term('major_axis', date2) - self.store.remove('wp2', where=[crit2]) - result = self.store['wp2'] - expected = wp.reindex( - major_axis=wp.major_axis - date1 - Index([date2])) - tm.assert_panel_equal(result, expected) - - date3 = [wp.major_axis[7], wp.major_axis[9]] - crit3 = Term('major_axis', date3) - self.store.remove('wp2', where=[crit3]) - result = self.store['wp2'] - expected = wp.reindex( - major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3)) - tm.assert_panel_equal(result, expected) - - # corners - self.store.put('wp4', wp, table=True) - n = self.store.remove( - 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) - result = self.store.select('wp4') - tm.assert_panel_equal(result, wp) + with ensure_clean(self.path) as store: + + wp = tm.makePanel() + + # group row removal + date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) + crit4 = Term('major_axis', date4) + store.put('wp3', wp, table=True) + n = store.remove('wp3', where=[crit4]) + assert(n == 36) + result = store.select('wp3') + expected = wp.reindex(major_axis=wp.major_axis - date4) + tm.assert_panel_equal(result, expected) + + # upper half + store.put('wp', wp, table=True) + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = Term('major_axis', '>', date) + crit2 = Term('minor_axis', ['A', 'D']) + n = store.remove('wp', where=[crit1]) + + assert(n == 56) + + n = store.remove('wp', where=[crit2]) + assert(n == 32) + + result = store['wp'] + expected = wp.truncate(after=date).reindex(minor=['B', 'C']) + tm.assert_panel_equal(result, expected) + + # individual row elements + store.put('wp2', wp, table=True) + + date1 = wp.major_axis[1:3] + crit1 = Term('major_axis', date1) + store.remove('wp2', where=[crit1]) + result = store.select('wp2') + expected = wp.reindex(major_axis=wp.major_axis - date1) + tm.assert_panel_equal(result, expected) + + date2 = wp.major_axis[5] + crit2 = Term('major_axis', date2) + store.remove('wp2', where=[crit2]) + result = store['wp2'] + expected = wp.reindex( + major_axis=wp.major_axis - date1 - Index([date2])) + tm.assert_panel_equal(result, expected) + + date3 = [wp.major_axis[7], wp.major_axis[9]] + crit3 = Term('major_axis', date3) + store.remove('wp2', where=[crit3]) + result = store['wp2'] + expected = wp.reindex( + major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3)) + tm.assert_panel_equal(result, expected) + + # corners + store.put('wp4', wp, table=True) + n = store.remove( + 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) + result = store.select('wp4') + tm.assert_panel_equal(result, wp) def test_terms(self): - wp = tm.makePanel() - p4d = tm.makePanel4D() - self.store.put('wp', wp, table=True) - self.store.put('p4d', p4d, table=True) - - # some invalid terms - terms = [ - ['minor', ['A', 'B']], - ['index', ['20121114']], - ['index', ['20121114', '20121114']], - ] - for t in terms: - self.assertRaises(Exception, self.store.select, 'wp', t) - - self.assertRaises(Exception, Term.__init__) - self.assertRaises(Exception, Term.__init__, 'blah') - self.assertRaises(Exception, Term.__init__, 'index') - self.assertRaises(Exception, Term.__init__, 'index', '==') - self.assertRaises(Exception, Term.__init__, 'index', '>', 5) - - # panel - result = self.store.select('wp', [Term( - 'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) - - # p4d - result = self.store.select('p4d', [Term('major_axis<20000108'), - Term('minor_axis', '=', ['A', 'B']), - Term('items', '=', ['ItemA', 'ItemB'])]) - expected = p4d.truncate(after='20000108').reindex( - minor=['A', 'B'], items=['ItemA', 'ItemB']) - tm.assert_panel4d_equal(result, expected) - - # valid terms - terms = [ - dict(field='major_axis', op='>', value='20121114'), - ('major_axis', '20121114'), - ('major_axis', '>', '20121114'), - (('major_axis', ['20121114', '20121114']),), - ('major_axis', datetime.datetime(2012, 11, 14)), - 'major_axis> 20121114', - 'major_axis >20121114', - 'major_axis > 20121114', - (('minor_axis', ['A', 'B']),), - (('minor_axis', ['A', 'B']),), - ((('minor_axis', ['A', 'B']),),), - (('items', ['ItemA', 'ItemB']),), - ('items=ItemA'), - ] - - for t in terms: - self.store.select('wp', t) - self.store.select('p4d', t) - - # valid for p4d only - terms = [ - (('labels', '=', ['l1', 'l2']),), - Term('labels', '=', ['l1', 'l2']), - ] - - for t in terms: - self.store.select('p4d', t) + with ensure_clean(self.path) as store: + + wp = tm.makePanel() + p4d = tm.makePanel4D() + store.put('wp', wp, table=True) + store.put('p4d', p4d, table=True) + + # some invalid terms + terms = [ + ['minor', ['A', 'B']], + ['index', ['20121114']], + ['index', ['20121114', '20121114']], + ] + for t in terms: + self.assertRaises(Exception, store.select, 'wp', t) + + self.assertRaises(Exception, Term.__init__) + self.assertRaises(Exception, Term.__init__, 'blah') + self.assertRaises(Exception, Term.__init__, 'index') + self.assertRaises(Exception, Term.__init__, 'index', '==') + self.assertRaises(Exception, Term.__init__, 'index', '>', 5) + + # panel + result = store.select('wp', [Term( + 'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) + expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) + tm.assert_panel_equal(result, expected) + + # p4d + result = store.select('p4d', [Term('major_axis<20000108'), + Term('minor_axis', '=', ['A', 'B']), + Term('items', '=', ['ItemA', 'ItemB'])]) + expected = p4d.truncate(after='20000108').reindex( + minor=['A', 'B'], items=['ItemA', 'ItemB']) + tm.assert_panel4d_equal(result, expected) + + # valid terms + terms = [ + dict(field='major_axis', op='>', value='20121114'), + ('major_axis', '20121114'), + ('major_axis', '>', '20121114'), + (('major_axis', ['20121114', '20121114']),), + ('major_axis', datetime.datetime(2012, 11, 14)), + 'major_axis> 20121114', + 'major_axis >20121114', + 'major_axis > 20121114', + (('minor_axis', ['A', 'B']),), + (('minor_axis', ['A', 'B']),), + ((('minor_axis', ['A', 'B']),),), + (('items', ['ItemA', 'ItemB']),), + ('items=ItemA'), + ] + + for t in terms: + store.select('wp', t) + store.select('p4d', t) + + # valid for p4d only + terms = [ + (('labels', '=', ['l1', 'l2']),), + Term('labels', '=', ['l1', 'l2']), + ] + + for t in terms: + store.select('p4d', t) def test_series(self): + s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) - + ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) - + ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) - + ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal) - + def test_sparse_series(self): + s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) - + ss2 = s.to_sparse(kind='integer') self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) - + ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True) def test_sparse_frame(self): + s = tm.makeDataFrame() s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) - + ss2 = s.to_sparse(kind='integer') self._check_double_roundtrip(ss2, tm.assert_frame_equal, check_frame_type=True) - + ss3 = s.to_sparse(fill_value=0) self._check_double_roundtrip(ss3, tm.assert_frame_equal, check_frame_type=True) - + def test_sparse_panel(self): + items = ['x', 'y', 'z'] p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) sp = p.to_sparse() - + self._check_double_roundtrip(sp, tm.assert_panel_equal, check_panel_type=True) - + sp2 = p.to_sparse(kind='integer') self._check_double_roundtrip(sp2, tm.assert_panel_equal, check_panel_type=True) - + sp3 = p.to_sparse(fill_value=0) self._check_double_roundtrip(sp3, tm.assert_panel_equal, check_panel_type=True) def test_float_index(self): + # GH #454 index = np.random.randn(10) s = Series(np.random.randn(10), index=index) self._check_roundtrip(s, tm.assert_series_equal) - + def test_tuple_index(self): + # GH #492 col = np.arange(10) idx = [(0., 1.), (2., 3.), (4., 5.)] @@ -1243,8 +1325,9 @@ def test_tuple_index(self): warnings.filterwarnings('ignore', category=PerformanceWarning) self._check_roundtrip(DF, tm.assert_frame_equal) warnings.filterwarnings('always', category=PerformanceWarning) - + def test_index_types(self): + values = np.random.randn(2) func = lambda l, r: tm.assert_series_equal(l, r, True, True, True) @@ -1253,45 +1336,47 @@ def test_index_types(self): ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) warnings.filterwarnings('always', category=PerformanceWarning) - + ser = Series(values, [datetime.datetime.today(), 0]) self._check_roundtrip(ser, func) - + ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) - + warnings.filterwarnings('ignore', category=PerformanceWarning) ser = Series(values, [datetime.date.today(), 'a']) self._check_roundtrip(ser, func) warnings.filterwarnings('always', category=PerformanceWarning) - + warnings.filterwarnings('ignore', category=PerformanceWarning) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) warnings.filterwarnings('always', category=PerformanceWarning) - + ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) - + ser = Series(values, [1, 5]) self._check_roundtrip(ser, func) - + ser = Series(values, [datetime.datetime( - 2012, 1, 1), datetime.datetime(2012, 1, 2)]) + 2012, 1, 1), datetime.datetime(2012, 1, 2)]) self._check_roundtrip(ser, func) - + def test_timeseries_preepoch(self): + if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest - + dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms') - + def test_frame(self): + df = tm.makeDataFrame() # put in some random NAs @@ -1300,23 +1385,24 @@ def test_frame(self): self._check_roundtrip_table(df, tm.assert_frame_equal) self._check_roundtrip(df, tm.assert_frame_equal) - + self._check_roundtrip_table(df, tm.assert_frame_equal, compression=True) self._check_roundtrip(df, tm.assert_frame_equal, compression=True) - + tdf = tm.makeTimeDataFrame() self._check_roundtrip(tdf, tm.assert_frame_equal) self._check_roundtrip(tdf, tm.assert_frame_equal, compression=True) - - # not consolidated - df['foo'] = np.random.randn(len(df)) - self.store['df'] = df - recons = self.store['df'] - self.assert_(recons._data.is_consolidated()) - + + with ensure_clean(self.path) as store: + # not consolidated + df['foo'] = np.random.randn(len(df)) + store['df'] = df + recons = store['df'] + self.assert_(recons._data.is_consolidated()) + # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) @@ -1332,37 +1418,33 @@ def test_empty_series_frame(self): self._check_roundtrip(df0, tm.assert_frame_equal) self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) - + def test_can_serialize_dates(self): + rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + self._check_roundtrip(frame, tm.assert_frame_equal) def test_timezones(self): rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - try: - store = HDFStore(self.scratchpath) + + with ensure_clean(self.path) as store: store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) - finally: - store.close() - os.remove(self.scratchpath) def test_fixed_offset_tz(self): rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00') frame = DataFrame(np.random.randn(len(rng), 4), index=rng) - try: - store = HDFStore(self.scratchpath) + + with ensure_clean(self.path) as store: store['frame'] = frame recons = store['frame'] self.assert_(recons.index.equals(rng)) self.assertEquals(rng.tz, recons.index.tz) - finally: - store.close() - os.remove(self.scratchpath) def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], @@ -1378,41 +1460,31 @@ def test_store_hierarchical(self): self._check_roundtrip(frame['A'], tm.assert_series_equal) # check that the names are stored - try: - store = HDFStore(self.scratchpath) + with ensure_clean(self.path) as store: store['frame'] = frame recons = store['frame'] assert(recons.index.names == ['foo', 'bar']) - finally: - store.close() - os.remove(self.scratchpath) def test_store_index_name(self): df = tm.makeDataFrame() df.index.name = 'foo' - try: - store = HDFStore(self.scratchpath) + + with ensure_clean(self.path) as store: store['frame'] = df recons = store['frame'] assert(recons.index.name == 'foo') - finally: - store.close() - os.remove(self.scratchpath) def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] - try: - store = HDFStore(self.scratchpath) + with ensure_clean(self.path) as store: store['series'] = series recons = store['series'] assert(recons.name == 'A') - finally: - store.close() - os.remove(self.scratchpath) def test_store_mixed(self): + def _make_one(): df = tm.makeDataFrame() df['obj1'] = 'foo' @@ -1429,16 +1501,17 @@ def _make_one(): self._check_roundtrip(df1, tm.assert_frame_equal) self._check_roundtrip(df2, tm.assert_frame_equal) - self.store['obj'] = df1 - tm.assert_frame_equal(self.store['obj'], df1) - self.store['obj'] = df2 - tm.assert_frame_equal(self.store['obj'], df2) - + with ensure_clean(self.path) as store: + store['obj'] = df1 + tm.assert_frame_equal(store['obj'], df1) + store['obj'] = df2 + tm.assert_frame_equal(store['obj'], df2) + # check that can store Series of all of these types self._check_roundtrip(df1['obj1'], tm.assert_series_equal) self._check_roundtrip(df1['bool1'], tm.assert_series_equal) self._check_roundtrip(df1['int1'], tm.assert_series_equal) - + # try with compression self._check_roundtrip(df1['obj1'], tm.assert_series_equal, compression=True) @@ -1450,25 +1523,23 @@ def _make_one(): compression=True) def test_wide(self): + wp = tm.makePanel() self._check_roundtrip(wp, tm.assert_panel_equal) def test_wide_table(self): + wp = tm.makePanel() self._check_roundtrip_table(wp, tm.assert_panel_equal) def test_wide_table_dups(self): wp = tm.makePanel() - try: - store = HDFStore(self.scratchpath) + with ensure_clean(self.path) as store: store._quiet = True store.put('panel', wp, table=True) store.put('panel', wp, table=True, append=True) recons = store['panel'] tm.assert_panel_equal(recons, wp) - finally: - store.close() - os.remove(self.scratchpath) def test_long(self): def _check(left, right): @@ -1484,220 +1555,234 @@ def test_longpanel(self): pass def test_overwrite_node(self): - self.store['a'] = tm.makeTimeDataFrame() - ts = tm.makeTimeSeries() - self.store['a'] = ts - tm.assert_series_equal(self.store['a'], ts) + with ensure_clean(self.path) as store: + store['a'] = tm.makeTimeDataFrame() + ts = tm.makeTimeSeries() + store['a'] = ts + + tm.assert_series_equal(store['a'], ts) def test_select(self): wp = tm.makePanel() - # put/select ok - self.store.remove('wp') - self.store.put('wp', wp, table=True) - self.store.select('wp') - - # non-table ok (where = None) - self.store.remove('wp') - self.store.put('wp2', wp, table=False) - self.store.select('wp2') - - # selection on the non-indexable with a large number of columns - wp = Panel( - np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], - major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) - - self.store.remove('wp') - self.store.append('wp', wp) - items = ['Item%03d' % i for i in xrange(80)] - result = self.store.select('wp', Term('items', items)) - expected = wp.reindex(items=items) - tm.assert_panel_equal(expected, result) - - # selectin non-table with a where - # self.assertRaises(Exception, self.store.select, - # 'wp2', ('column', ['A', 'D'])) - - # select with columns= - df = tm.makeTimeDataFrame() - self.store.remove('df') - self.store.append('df', df) - result = self.store.select('df', columns=['A', 'B']) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) - - # equivalentsly - result = self.store.select('df', [('columns', ['A', 'B'])]) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) - - # with a data column - self.store.remove('df') - self.store.append('df', df, data_columns=['A']) - result = self.store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) - - # all a data columns - self.store.remove('df') - self.store.append('df', df, data_columns=True) - result = self.store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) - - # with a data column, but different columns - self.store.remove('df') - self.store.append('df', df, data_columns=['A']) - result = self.store.select('df', ['A > 0'], columns=['C', 'D']) - expected = df[df.A > 0].reindex(columns=['C', 'D']) - tm.assert_frame_equal(expected, result) - - # with a Timestamp data column (GH #2637) - df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300))) - self.store.remove('df') - self.store.append('df', df, data_columns=['ts', 'A']) - result = self.store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))]) - expected = df[df.ts >= Timestamp('2012-02-01')] - tm.assert_frame_equal(expected, result) + with ensure_clean(self.path) as store: + + # put/select ok + store.remove('wp') + store.put('wp', wp, table=True) + store.select('wp') + + # non-table ok (where = None) + store.remove('wp') + store.put('wp2', wp, table=False) + store.select('wp2') + + # selection on the non-indexable with a large number of columns + wp = Panel( + np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], + major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) + + store.remove('wp') + store.append('wp', wp) + items = ['Item%03d' % i for i in xrange(80)] + result = store.select('wp', Term('items', items)) + expected = wp.reindex(items=items) + tm.assert_panel_equal(expected, result) + + # selectin non-table with a where + # self.assertRaises(Exception, store.select, + # 'wp2', ('column', ['A', 'D'])) + + # select with columns= + df = tm.makeTimeDataFrame() + store.remove('df') + store.append('df', df) + result = store.select('df', columns=['A', 'B']) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) + + # equivalentsly + result = store.select('df', [('columns', ['A', 'B'])]) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) + + # with a data column + store.remove('df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) + + # all a data columns + store.remove('df') + store.append('df', df, data_columns=True) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) + + # with a data column, but different columns + store.remove('df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['C', 'D']) + expected = df[df.A > 0].reindex(columns=['C', 'D']) + tm.assert_frame_equal(expected, result) + + # with a Timestamp data column (GH #2637) + df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300))) + store.remove('df') + store.append('df', df, data_columns=['ts', 'A']) + result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))]) + expected = df[df.ts >= Timestamp('2012-02-01')] + tm.assert_frame_equal(expected, result) def test_panel_select(self): - wp = tm.makePanel() - self.store.put('wp', wp, table=True) - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = ('major_axis', '>=', date) - crit2 = ('minor_axis', '=', ['A', 'D']) - result = self.store.select('wp', [crit1, crit2]) - expected = wp.truncate(before=date).reindex(minor=['A', 'D']) - tm.assert_panel_equal(result, expected) - - result = self.store.select( - 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])]) - expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) + wp = tm.makePanel() + with ensure_clean(self.path) as store: + store.put('wp', wp, table=True) + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = ('major_axis', '>=', date) + crit2 = ('minor_axis', '=', ['A', 'D']) + + result = store.select('wp', [crit1, crit2]) + expected = wp.truncate(before=date).reindex(minor=['A', 'D']) + tm.assert_panel_equal(result, expected) + + result = store.select( + 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])]) + expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) + tm.assert_panel_equal(result, expected) + def test_frame_select(self): - df = tm.makeTimeDataFrame() - self.store.put('frame', df, table=True) - date = df.index[len(df) // 2] - - crit1 = ('index', '>=', date) - crit2 = ('columns', ['A', 'D']) - crit3 = ('columns', 'A') - - result = self.store.select('frame', [crit1, crit2]) - expected = df.ix[date:, ['A', 'D']] - tm.assert_frame_equal(result, expected) - - result = self.store.select('frame', [crit3]) - expected = df.ix[:, ['A']] - tm.assert_frame_equal(result, expected) - - # other indicies for a frame - - # integer - df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) - self.store.append('df_int', df) - self.store.select( - 'df_int', [Term("index<10"), Term("columns", "=", ["A"])]) - df = DataFrame(dict(A=np.random.rand( - 20), B=np.random.rand(20), index=np.arange(20, dtype='f8'))) - self.store.append('df_float', df) - self.store.select( - 'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])]) - - # invalid terms df = tm.makeTimeDataFrame() - self.store.append('df_time', df) - self.assertRaises( - Exception, self.store.select, 'df_time', [Term("index>0")]) - - # can't select if not written as table - # self.store['frame'] = df - # self.assertRaises(Exception, self.store.select, - # 'frame', [crit1, crit2]) + with ensure_clean(self.path) as store: + store.put('frame', df, table=True) + date = df.index[len(df) // 2] + + crit1 = ('index', '>=', date) + crit2 = ('columns', ['A', 'D']) + crit3 = ('columns', 'A') + + result = store.select('frame', [crit1, crit2]) + expected = df.ix[date:, ['A', 'D']] + tm.assert_frame_equal(result, expected) + + result = store.select('frame', [crit3]) + expected = df.ix[:, ['A']] + tm.assert_frame_equal(result, expected) + + # other indicies for a frame + + # integer + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) + store.append('df_int', df) + store.select( + 'df_int', [Term("index<10"), Term("columns", "=", ["A"])]) + + df = DataFrame(dict(A=np.random.rand( + 20), B=np.random.rand(20), index=np.arange(20, dtype='f8'))) + store.append('df_float', df) + store.select( + 'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])]) + + # invalid terms + df = tm.makeTimeDataFrame() + store.append('df_time', df) + self.assertRaises( + Exception, store.select, 'df_time', [Term("index>0")]) + + # can't select if not written as table + # store['frame'] = df + # self.assertRaises(Exception, store.select, + # 'frame', [crit1, crit2]) + def test_unique(self): + df = tm.makeTimeDataFrame() def check(x, y): self.assert_((np.unique(x) == np.unique(y)).all() == True) - self.store.remove('df') - self.store.append('df', df) - - # error - self.assertRaises(KeyError, self.store.unique, 'df', 'foo') - - # valid - result = self.store.unique('df', 'index') - check(result.values, df.index.values) - - # not a data indexable column - self.assertRaises( - ValueError, self.store.unique, 'df', 'values_block_0') - - # a data column - df2 = df.copy() - df2['string'] = 'foo' - self.store.append('df2', df2, data_columns=['string']) - result = self.store.unique('df2', 'string') - check(result.values, df2['string'].unique()) - - # a data column with NaNs, result excludes the NaNs - df3 = df.copy() - df3['string'] = 'foo' - df3.ix[4:6, 'string'] = np.nan - self.store.append('df3', df3, data_columns=['string']) - result = self.store.unique('df3', 'string') - check(result.values, df3['string'].valid().unique()) + with ensure_clean(self.path) as store: + store.remove('df') + store.append('df', df) + + # error + self.assertRaises(KeyError, store.unique, 'df', 'foo') + + # valid + result = store.unique('df', 'index') + check(result.values, df.index.values) + + # not a data indexable column + self.assertRaises( + ValueError, store.unique, 'df', 'values_block_0') + + # a data column + df2 = df.copy() + df2['string'] = 'foo' + store.append('df2', df2, data_columns=['string']) + result = store.unique('df2', 'string') + check(result.values, df2['string'].unique()) + + # a data column with NaNs, result excludes the NaNs + df3 = df.copy() + df3['string'] = 'foo' + df3.ix[4:6, 'string'] = np.nan + store.append('df3', df3, data_columns=['string']) + result = store.unique('df3', 'string') + check(result.values, df3['string'].valid().unique()) def test_coordinates(self): df = tm.makeTimeDataFrame() - self.store.remove('df') - self.store.append('df', df) - - # all - c = self.store.select_as_coordinates('df') - assert((c.values == np.arange(len(df.index))).all() == True) - - # get coordinates back & test vs frame - self.store.remove('df') - - df = DataFrame(dict(A=range(5), B=range(5))) - self.store.append('df', df) - c = self.store.select_as_coordinates('df', ['index<3']) - assert((c.values == np.arange(3)).all() == True) - result = self.store.select('df', where=c) - expected = df.ix[0:2, :] - tm.assert_frame_equal(result, expected) - - c = self.store.select_as_coordinates('df', ['index>=3', 'index<=4']) - assert((c.values == np.arange(2) + 3).all() == True) - result = self.store.select('df', where=c) - expected = df.ix[3:4, :] - tm.assert_frame_equal(result, expected) - - # multiple tables - self.store.remove('df1') - self.store.remove('df2') - df1 = tm.makeTimeDataFrame() - df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) - self.store.append('df1', df1, data_columns=['A', 'B']) - self.store.append('df2', df2) - - c = self.store.select_as_coordinates('df1', ['A>0', 'B>0']) - df1_result = self.store.select('df1', c) - df2_result = self.store.select('df2', c) - result = concat([df1_result, df2_result], axis=1) + with ensure_clean(self.path) as store: - expected = concat([df1, df2], axis=1) - expected = expected[(expected.A > 0) & (expected.B > 0)] - tm.assert_frame_equal(result, expected) + store.remove('df') + store.append('df', df) + + # all + c = store.select_as_coordinates('df') + assert((c.values == np.arange(len(df.index))).all() == True) + + # get coordinates back & test vs frame + store.remove('df') + + df = DataFrame(dict(A=range(5), B=range(5))) + store.append('df', df) + c = store.select_as_coordinates('df', ['index<3']) + assert((c.values == np.arange(3)).all() == True) + result = store.select('df', where=c) + expected = df.ix[0:2, :] + tm.assert_frame_equal(result, expected) + + c = store.select_as_coordinates('df', ['index>=3', 'index<=4']) + assert((c.values == np.arange(2) + 3).all() == True) + result = store.select('df', where=c) + expected = df.ix[3:4, :] + tm.assert_frame_equal(result, expected) + + # multiple tables + store.remove('df1') + store.remove('df2') + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) + store.append('df1', df1, data_columns=['A', 'B']) + store.append('df2', df2) + + c = store.select_as_coordinates('df1', ['A>0', 'B>0']) + df1_result = store.select('df1', c) + df2_result = store.select('df2', c) + result = concat([df1_result, df2_result], axis=1) + + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected) def test_append_to_multiple(self): df1 = tm.makeTimeDataFrame() @@ -1705,102 +1790,109 @@ def test_append_to_multiple(self): df2['foo'] = 'bar' df = concat([df1, df2], axis=1) - # exceptions - self.assertRaises(Exception, self.store.append_to_multiple, {'df1': - ['A', 'B'], 'df2': None}, df, selector='df3') - self.assertRaises(Exception, self.store.append_to_multiple, - {'df1': None, 'df2': None}, df, selector='df3') - self.assertRaises( - Exception, self.store.append_to_multiple, 'df1', df, 'df1') - - # regular operation - self.store.append_to_multiple( - {'df1': ['A', 'B'], 'df2': None}, df, selector='df1') - result = self.store.select_as_multiple( - ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') - expected = df[(df.A > 0) & (df.B > 0)] - tm.assert_frame_equal(result, expected) + with ensure_clean(self.path) as store: + # exceptions + self.assertRaises(Exception, store.append_to_multiple, + {'df1': ['A', 'B'], 'df2': None}, df, selector='df3') + self.assertRaises(Exception, store.append_to_multiple, + {'df1': None, 'df2': None}, df, selector='df3') + self.assertRaises( + Exception, store.append_to_multiple, 'df1', df, 'df1') + + # regular operation + store.append_to_multiple( + {'df1': ['A', 'B'], 'df2': None}, df, selector='df1') + result = store.select_as_multiple( + ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') + expected = df[(df.A > 0) & (df.B > 0)] + tm.assert_frame_equal(result, expected) + def test_select_as_multiple(self): + df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) df2['foo'] = 'bar' - self.store.append('df1', df1, data_columns=['A', 'B']) - self.store.append('df2', df2) - - # exceptions - self.assertRaises(Exception, self.store.select_as_multiple, - None, where=['A>0', 'B>0'], selector='df1') - self.assertRaises(Exception, self.store.select_as_multiple, - [None], where=['A>0', 'B>0'], selector='df1') - - # default select - result = self.store.select('df1', ['A>0', 'B>0']) - expected = self.store.select_as_multiple( - ['df1'], where=['A>0', 'B>0'], selector='df1') - tm.assert_frame_equal(result, expected) - expected = self.store.select_as_multiple( - 'df1', where=['A>0', 'B>0'], selector='df1') - tm.assert_frame_equal(result, expected) - - # multiple - result = self.store.select_as_multiple( - ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') - expected = concat([df1, df2], axis=1) - expected = expected[(expected.A > 0) & (expected.B > 0)] - tm.assert_frame_equal(result, expected) - - # multiple (diff selector) - result = self.store.select_as_multiple(['df1', 'df2'], where=[Term( - 'index', '>', df2.index[4])], selector='df2') - expected = concat([df1, df2], axis=1) - expected = expected[5:] - tm.assert_frame_equal(result, expected) - - # test excpection for diff rows - self.store.append('df3', tm.makeTimeDataFrame(nper=50)) - self.assertRaises(Exception, self.store.select_as_multiple, ['df1', - 'df3'], where=['A>0', 'B>0'], selector='df1') - def test_start_stop(self): + with ensure_clean(self.path) as store: + store.append('df1', df1, data_columns=['A', 'B']) + store.append('df2', df2) + + # exceptions + self.assertRaises(Exception, store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') + self.assertRaises(Exception, store.select_as_multiple, + [None], where=['A>0', 'B>0'], selector='df1') + + # default select + result = store.select('df1', ['A>0', 'B>0']) + expected = store.select_as_multiple( + ['df1'], where=['A>0', 'B>0'], selector='df1') + tm.assert_frame_equal(result, expected) + expected = store.select_as_multiple( + 'df1', where=['A>0', 'B>0'], selector='df1') + tm.assert_frame_equal(result, expected) + + # multiple + result = store.select_as_multiple( + ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1') + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected) + + # multiple (diff selector) + result = store.select_as_multiple(['df1', 'df2'], where=[Term( + 'index', '>', df2.index[4])], selector='df2') + expected = concat([df1, df2], axis=1) + expected = expected[5:] + tm.assert_frame_equal(result, expected) + + # test excpection for diff rows + store.append('df3', tm.makeTimeDataFrame(nper=50)) + self.assertRaises(Exception, store.select_as_multiple, + ['df1','df3'], where=['A>0', 'B>0'], selector='df1') - df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) - self.store.append('df', df) + def test_start_stop(self): - result = self.store.select( - 'df', [Term("columns", "=", ["A"])], start=0, stop=5) - expected = df.ix[0:4, ['A']] - tm.assert_frame_equal(result, expected) + with ensure_clean(self.path) as store: - # out of range - result = self.store.select( - 'df', [Term("columns", "=", ["A"])], start=30, stop=40) - assert(len(result) == 0) - assert(type(result) == DataFrame) + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) + store.append('df', df) + + result = store.select( + 'df', [Term("columns", "=", ["A"])], start=0, stop=5) + expected = df.ix[0:4, ['A']] + tm.assert_frame_equal(result, expected) + + # out of range + result = store.select( + 'df', [Term("columns", "=", ["A"])], start=30, stop=40) + assert(len(result) == 0) + assert(type(result) == DataFrame) def test_select_filter_corner(self): + df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] - self.store.put('frame', df, table=True) - crit = Term('columns', df.columns[:75]) - result = self.store.select('frame', [crit]) - tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) + with ensure_clean(self.path) as store: + store.put('frame', df, table=True) + crit = Term('columns', df.columns[:75]) + result = store.select('frame', [crit]) + tm.assert_frame_equal(result, df.ix[:, df.columns[:75]]) + def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): + options = {} if compression: options['complib'] = _default_compressor - store = HDFStore(self.scratchpath, 'w', **options) - try: + with ensure_clean(self.path, 'w', **options) as store: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) - finally: - store.close() - os.remove(self.scratchpath) def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): @@ -1808,84 +1900,90 @@ def _check_double_roundtrip(self, obj, comparator, compression=False, if compression: options['complib'] = _default_compressor - store = HDFStore(self.scratchpath, 'w', **options) - try: + with ensure_clean(self.path, 'w', **options) as store: store['obj'] = obj retrieved = store['obj'] comparator(retrieved, obj, **kwargs) store['obj'] = retrieved again = store['obj'] comparator(again, obj, **kwargs) - finally: - store.close() - os.remove(self.scratchpath) + def _check_roundtrip_table(self, obj, comparator, compression=False): options = {} if compression: options['complib'] = _default_compressor - store = HDFStore(self.scratchpath, 'w', **options) - try: + with ensure_clean(self.path, 'w', **options) as store: store.put('obj', obj, table=True) retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) - finally: - store.close() - os.remove(self.scratchpath) def test_pytables_native_read(self): pth = curpath() - store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r') - d2 = store['detector/readout'] - store.close() - store = HDFStore(os.path.join(pth, 'pytables_native2.h5'), 'r') - str(store) - d1 = store['detector'] - store.close() + + try: + store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r') + d2 = store['detector/readout'] + finally: + safe_close(store) + + try: + store = HDFStore(os.path.join(pth, 'pytables_native2.h5'), 'r') + str(store) + d1 = store['detector'] + finally: + safe_close(store) def test_legacy_read(self): pth = curpath() - store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') - store['a'] - store['b'] - store['c'] - store['d'] - store.close() + try: + store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') + store['a'] + store['b'] + store['c'] + store['d'] + finally: + safe_close(store) def test_legacy_table_read(self): # legacy table types pth = curpath() - store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') - store.select('df1') - store.select('df2') - store.select('wp1') - - # force the frame - store.select('df2', typ='legacy_frame') - - # old version warning - warnings.filterwarnings('ignore', category=IncompatibilityWarning) - self.assertRaises( - Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) - - df2 = store.select('df2') - store.select('df2', Term('index', '>', df2.index[2])) - warnings.filterwarnings('always', category=IncompatibilityWarning) + try: + store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') + store.select('df1') + store.select('df2') + store.select('wp1') + + # force the frame + store.select('df2', typ='legacy_frame') + + # old version warning + warnings.filterwarnings('ignore', category=IncompatibilityWarning) + self.assertRaises( + Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) - store.close() + df2 = store.select('df2') + store.select('df2', Term('index', '>', df2.index[2])) + warnings.filterwarnings('always', category=IncompatibilityWarning) + + finally: + safe_close(store) def test_legacy_0_10_read(self): # legacy from 0.10 pth = curpath() - store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r') - for k in store.keys(): - store.select(k) - store.close() + try: + store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r') + for k in store.keys(): + store.select(k) + finally: + safe_close(store) def test_copy(self): pth = curpath() + def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): try: import os @@ -1893,6 +1991,7 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): if f is None: f = os.path.join(pth, 'legacy_0.10.h5') + store = HDFStore(f, 'r') if new_f is None: @@ -1920,13 +2019,9 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): except (Exception), detail: pass finally: - store.close() - tstore.close() - import os - try: - os.remove(new_f) - except: - pass + safe_close(store) + safe_close(tstore) + safe_remove(new_f) do_copy() do_copy(keys = ['df']) @@ -1934,18 +2029,19 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): # new table df = tm.makeDataFrame() + try: - st = HDFStore(self.scratchpath) + st = HDFStore(self.path) st.append('df', df, data_columns = ['A']) st.close() - do_copy(f = self.scratchpath) - do_copy(f = self.scratchpath, propindexes = False) + do_copy(f = self.path) + do_copy(f = self.path, propindexes = False) finally: - import os - os.remove(self.scratchpath) + safe_remove(self.path) def test_legacy_table_write(self): raise nose.SkipTest + # legacy table types pth = curpath() df = tm.makeDataFrame() @@ -1959,71 +2055,78 @@ def test_legacy_table_write(self): store.close() def test_store_datetime_fractional_secs(self): - dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) - series = Series([0], [dt]) - self.store['a'] = series - self.assertEquals(self.store['a'].index[0], dt) - - def test_tseries_indices_series(self): - idx = tm.makeDateIndex(10) - ser = Series(np.random.randn(len(idx)), idx) - self.store['a'] = ser - result = self.store['a'] - assert_series_equal(result, ser) - self.assertEquals(type(result.index), type(ser.index)) - self.assertEquals(result.index.freq, ser.index.freq) + with ensure_clean(self.path) as store: + dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) + series = Series([0], [dt]) + store['a'] = series + self.assertEquals(store['a'].index[0], dt) - idx = tm.makePeriodIndex(10) - ser = Series(np.random.randn(len(idx)), idx) - self.store['a'] = ser - result = self.store['a'] + def test_tseries_indices_series(self): - assert_series_equal(result, ser) - self.assertEquals(type(result.index), type(ser.index)) - self.assertEquals(result.index.freq, ser.index.freq) + with ensure_clean(self.path) as store: + idx = tm.makeDateIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store['a'] = ser + result = store['a'] + + assert_series_equal(result, ser) + self.assertEquals(type(result.index), type(ser.index)) + self.assertEquals(result.index.freq, ser.index.freq) + + idx = tm.makePeriodIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store['a'] = ser + result = store['a'] + + assert_series_equal(result, ser) + self.assertEquals(type(result.index), type(ser.index)) + self.assertEquals(result.index.freq, ser.index.freq) def test_tseries_indices_frame(self): - idx = tm.makeDateIndex(10) - df = DataFrame(np.random.randn(len(idx), 3), index=idx) - self.store['a'] = df - result = self.store['a'] - - assert_frame_equal(result, df) - self.assertEquals(type(result.index), type(df.index)) - self.assertEquals(result.index.freq, df.index.freq) - - idx = tm.makePeriodIndex(10) - df = DataFrame(np.random.randn(len(idx), 3), idx) - self.store['a'] = df - result = self.store['a'] - assert_frame_equal(result, df) - self.assertEquals(type(result.index), type(df.index)) - self.assertEquals(result.index.freq, df.index.freq) + with ensure_clean(self.path) as store: + idx = tm.makeDateIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + store['a'] = df + result = store['a'] + + assert_frame_equal(result, df) + self.assertEquals(type(result.index), type(df.index)) + self.assertEquals(result.index.freq, df.index.freq) + + idx = tm.makePeriodIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), idx) + store['a'] = df + result = store['a'] + + assert_frame_equal(result, df) + self.assertEquals(type(result.index), type(df.index)) + self.assertEquals(result.index.freq, df.index.freq) def test_unicode_index(self): + unicode_values = [u'\u03c3', u'\u03c3\u03c3'] warnings.filterwarnings('ignore', category=PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) warnings.filterwarnings('always', category=PerformanceWarning) - def test_store_datetime_mixed(self): + df = DataFrame( {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']}) ts = tm.makeTimeSeries() df['d'] = ts.index[:3] self._check_roundtrip(df, tm.assert_frame_equal) - + # def test_cant_write_multiindex_table(self): # # for now, #1848 # df = DataFrame(np.random.randn(10, 4), # index=[np.arange(5).repeat(2), # np.tile(np.arange(2), 5)]) - # self.assertRaises(Exception, self.store.put, 'foo', df, table=True) + # self.assertRaises(Exception, store.put, 'foo', df, table=True) def curpath():