diff --git a/RELEASE.rst b/RELEASE.rst index 59a86221d14a9..245e72d6bca6e 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -52,6 +52,7 @@ pandas 0.10.1 - added method ``unique`` to select the unique values in an indexable or data column - added method ``copy`` to copy an existing store (and possibly upgrade) - show the shape of the data on disk for non-table stores when printing the store + - added ability to read PyTables flavor tables (allows compatiblity to other HDF5 systems) - Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565) - Support reading gzipped data from file-like object - ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_) @@ -66,6 +67,8 @@ pandas 0.10.1 - handle correctly ``Term`` passed types (e.g. ``index<1000``, when index is ``Int64``), (closes GH512_) - handle Timestamp correctly in data_columns (closes GH2637_) + - contains correctly matches on non-natural names + - correctly store ``float32`` dtypes in tables (if not other float types in the same table) - Fix DataFrame.info bug with UTF8-encoded columns. (GH2576_) - Fix DatetimeIndex handling of FixedOffset tz (GH2604_) - More robust detection of being in IPython session for wide DataFrame @@ -86,6 +89,7 @@ pandas 0.10.1 - refactored HFDStore to deal with non-table stores as objects, will allow future enhancements - removed keyword ``compression`` from ``put`` (replaced by keyword ``complib`` to be consistent across library) + - warn `PerformanceWarning` if you are attempting to store types that will be pickled by PyTables .. _GH512: https://github.com/pydata/pandas/issues/512 .. _GH1277: https://github.com/pydata/pandas/issues/1277 @@ -98,6 +102,7 @@ pandas 0.10.1 .. _GH2625: https://github.com/pydata/pandas/issues/2625 .. _GH2643: https://github.com/pydata/pandas/issues/2643 .. _GH2637: https://github.com/pydata/pandas/issues/2637 +.. _GH2694: https://github.com/pydata/pandas/issues/2694 pandas 0.10.0 ============= diff --git a/doc/source/io.rst b/doc/source/io.rst index 1b61de7bf8281..6b7ec3dfdd841 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1211,7 +1211,7 @@ You can create/modify an index for a table with ``create_table_index`` after dat Query via Data Columns ~~~~~~~~~~~~~~~~~~~~~~ -You can designate (and index) certain columns that you want to be able to perform queries (other than the `indexable` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. +You can designate (and index) certain columns that you want to be able to perform queries (other than the `indexable` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. You can specify ``data_columns = True`` to force all columns to be data_columns .. ipython:: python @@ -1260,7 +1260,7 @@ To retrieve the *unique* values of an indexable or data column, use the method ` concat([ store.select('df_dc',c) for c in [ crit1, crit2 ] ]) -**Table Object** +**Storer Object** If you want to inspect the stored object, retrieve via ``get_storer``. You could use this progamatically to say get the number of rows in an object. @@ -1363,17 +1363,40 @@ Notes & Caveats # we have provided a minimum minor_axis indexable size store.root.wp_big_strings.table -Compatibility -~~~~~~~~~~~~~ +External Compatibility +~~~~~~~~~~~~~~~~~~~~~~ + +``HDFStore`` write storer objects in specific formats suitable for producing loss-less roundtrips to pandas objects. For external compatibility, ``HDFStore`` can read native ``PyTables`` format tables. It is possible to write an ``HDFStore`` object that can easily be imported into ``R`` using the ``rhdf5`` library. Create a table format store like this: + + .. ipython:: python + + store_export = HDFStore('export.h5') + store_export.append('df_dc',df_dc,data_columns=df_dc.columns) + store_export + + .. ipython:: python + :suppress: + + store_export.close() + import os + os.remove('export.h5') + +Backwards Compatibility +~~~~~~~~~~~~~~~~~~~~~~~ 0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire file and write it out using the new format, using the method ``copy`` to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. ``copy`` takes a number of options, please see the docstring. + .. ipython:: python + :suppress: + + import os + legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5') + .. ipython:: python # a legacy store - import os - legacy_store = HDFStore('legacy_0.10.h5', 'r') + legacy_store = HDFStore(legacy_file_path,'r') legacy_store # copy (and return the new handle) @@ -1397,6 +1420,7 @@ Performance - You can pass ``chunksize=an integer`` to ``append``, to change the writing chunksize (default is 50000). This will signficantly lower your memory usage on writing. - You can pass ``expectedrows=an integer`` to the first ``append``, to set the TOTAL number of expectedrows that ``PyTables`` will expected. This will optimize read/write performance. - Duplicate rows can be written to tables, but are filtered out in selection (with the last items being selected; thus a table is unique on major, minor pairs) + - A ``PerformanceWarning`` will be raised if you are attempting to store types that will be pickled by PyTables (rather than stored as endemic types). See for more information and some solutions. Experimental ~~~~~~~~~~~~ diff --git a/doc/source/v0.10.1.txt b/doc/source/v0.10.1.txt index 2eb40b2823214..8aa2dad2b35a0 100644 --- a/doc/source/v0.10.1.txt +++ b/doc/source/v0.10.1.txt @@ -119,12 +119,15 @@ Multi-table creation via ``append_to_multiple`` and selection via ``select_as_mu **Enhancements** +- ``HDFStore`` now can read native PyTables table format tables - You can pass ``nan_rep = 'my_nan_rep'`` to append, to change the default nan representation on disk (which converts to/from `np.nan`), this defaults to `nan`. - You can pass ``index`` to ``append``. This defaults to ``True``. This will automagically create indicies on the *indexables* and *data columns* of the table - You can pass ``chunksize=an integer`` to ``append``, to change the writing chunksize (default is 50000). This will signficantly lower your memory usage on writing. - You can pass ``expectedrows=an integer`` to the first ``append``, to set the TOTAL number of expectedrows that ``PyTables`` will expected. This will optimize read/write performance. - ``Select`` now supports passing ``start`` and ``stop`` to provide selection space limiting in selection. +**Bug Fixes** +- ``HDFStore`` tables can now store ``float32`` types correctly (cannot be mixed with ``float64`` however) See the `full release notes `__ or issue tracker diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 571bcf5008178..40c4dc6e5efe7 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -835,4 +835,4 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): def factor_indexer(shape, labels): """ given a tuple of shape and a list of Factor lables, return the expanded label indexer """ mult = np.array(shape)[::-1].cumprod()[::-1] - return np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T + return com._ensure_platform_int(np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b7cdf1706b5e9..78bd204f26993 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -24,6 +24,7 @@ from pandas.core.common import _asarray_tuplesafe, _try_sort from pandas.core.internals import BlockManager, make_block, form_blocks from pandas.core.reshape import block2d_to_block3d, block2d_to_blocknd, factor_indexer +from pandas.core.index import Int64Index import pandas.core.common as com from pandas.tools.merge import concat @@ -41,6 +42,11 @@ class IncompatibilityWarning(Warning): pass where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with the copy_to method) """ +class PerformanceWarning(Warning): pass +performance_doc = """ +your performance may suffer as PyTables swill pickle object types that it cannot map +directly to c-types [inferred_type->%s,key->%s] +""" # map object types _TYPE_MAP = { @@ -71,6 +77,7 @@ class IncompatibilityWarning(Warning): pass # table class map _TABLE_MAP = { + 'generic_table' : 'GenericTable', 'appendable_frame' : 'AppendableFrameTable', 'appendable_multiframe' : 'AppendableMultiFrameTable', 'appendable_panel' : 'AppendablePanelTable', @@ -220,7 +227,7 @@ def __contains__(self, key): node = self.get_node(key) if node is not None: name = node._v_pathname - return re.search(key, name) is not None + if name == key or name[1:] == key: return True return False def __len__(self): @@ -508,7 +515,7 @@ def append(self, key, value, columns=None, **kwargs): Optional Parameters ------------------- - data_columns : list of columns to create as data columns + data_columns : list of columns to create as data columns, or True to use all columns min_itemsize : dict of columns that specify minimum string sizes nan_rep : string to use as string nan represenation chunksize : size to chunk the writing @@ -609,7 +616,8 @@ def create_table_index(self, key, **kwargs): def groups(self): """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ - return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ] + _tables() + return [ g for g in self.handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ] def get_node(self, key): """ return the node with the key or None if it does not exist """ @@ -684,16 +692,23 @@ def error(t): # infer the pt from the passed value if pt is None: if value is None: - raise Exception("cannot create a storer if the object is not existing nor a value are passed") - try: - pt = _TYPE_MAP[type(value)] - except: - error('_TYPE_MAP') + _tables() + if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table): + pt = 'frame_table' + tt = 'generic_table' + else: + raise Exception("cannot create a storer if the object is not existing nor a value are passed") + else: + + try: + pt = _TYPE_MAP[type(value)] + except: + error('_TYPE_MAP') - # we are actually a table - if table or append: - pt += '_table' + # we are actually a table + if table or append: + pt += '_table' # a storer node if 'table' not in pt: @@ -959,6 +974,24 @@ def set_attr(self): """ set the kind for this colummn """ setattr(self.attrs, self.kind_attr, self.kind) +class GenericIndexCol(IndexCol): + """ an index which is not represented in the data of the table """ + + @property + def is_indexed(self): + return False + + def convert(self, values, nan_rep): + """ set the values from this selection: take = take ownership """ + + self.values = Int64Index(np.arange(self.table.nrows)) + return self + + def get_attr(self): + pass + + def set_attr(self): + pass class DataCol(IndexCol): """ a data holding column, by definition this is not indexable @@ -1096,7 +1129,7 @@ def get_atom_data(self, block): def set_atom_data(self, block): self.kind = block.dtype.name self.typ = self.get_atom_data(block) - self.set_data(block.values.astype(self.typ._deftype)) + self.set_data(block.values.astype(self.typ.type)) def get_atom_datetime64(self, block): return _tables().Int64Col(shape=block.shape[0]) @@ -1194,6 +1227,12 @@ def get_atom_data(self, block): def get_atom_datetime64(self, block): return _tables().Int64Col() +class GenericDataIndexableCol(DataIndexableCol): + """ represent a generic pytables data column """ + + def get_attr(self): + pass + class Storer(object): """ represent an object in my store facilitate read/write of various types of objects @@ -1238,6 +1277,8 @@ def __repr__(self): self.infer_axes() s = self.shape if s is not None: + if isinstance(s, (list,tuple)): + s = "[%s]" % ','.join([ str(x) for x in s ]) return "%-12.12s (shape->%s)" % (self.pandas_type,s) return self.pandas_type @@ -1570,6 +1611,17 @@ def write_array(self, key, value): return if value.dtype.type == np.object_: + + # infer the type, warn if we have a non-string type here (for performance) + inferred_type = lib.infer_dtype(value.flatten()) + if empty_array: + pass + elif inferred_type == 'string': + pass + else: + ws = performance_doc % (inferred_type,key) + warnings.warn(ws, PerformanceWarning) + vlarr = self.handle.createVLArray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) @@ -1618,7 +1670,7 @@ class SeriesStorer(GenericStorer): @property def shape(self): try: - return "[%s]" % len(getattr(self.group,'values',None)) + return len(getattr(self.group,'values')), except: return None @@ -1748,7 +1800,7 @@ def shape(self): if self.is_shape_reversed: shape = shape[::-1] - return "[%s]" % ','.join([ str(x) for x in shape ]) + return shape except: return None @@ -1810,7 +1862,7 @@ class Table(Storer): index_axes : a list of tuples of the (original indexing axis and index column) non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis) values_axes : a list of the columns which comprise the data of this table - data_columns : a list of the columns that we are allowing indexing (these become single columns in values_axes) + data_columns : a list of the columns that we are allowing indexing (these become single columns in values_axes), or True to force all columns nan_rep : the string to use for nan representations for string objects levels : the names of levels @@ -1908,7 +1960,7 @@ def is_transposed(self): @property def data_orientation(self): """ return a tuple of my permutated axes, non_indexable at the front """ - return tuple(itertools.chain([a[0] for a in self.non_index_axes], [a.axis for a in self.index_axes])) + return tuple(itertools.chain([int(a[0]) for a in self.non_index_axes], [int(a.axis) for a in self.index_axes])) def queryables(self): """ return a dict of the kinds allowable columns for this object """ @@ -2075,7 +2127,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, validate: validate the obj against an existiing object already written min_itemsize: a dict of the min size for a column in bytes nan_rep : a values to use for string column nan_rep - data_columns : a list of columns that we want to create separate to allow indexing + data_columns : a list of columns that we want to create separate to allow indexing (or True will force all colummns) """ @@ -2109,12 +2161,6 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, nan_rep = 'nan' self.nan_rep = nan_rep - # convert the objects if we can to better divine dtypes - try: - obj = obj.convert_objects() - except: - pass - # create axes to index and non_index index_axes_map = dict() for i, a in enumerate(obj.axes): @@ -2160,6 +2206,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, if data_columns is not None and len(self.non_index_axes): axis = self.non_index_axes[0][0] axis_labels = self.non_index_axes[0][1] + if data_columns is True: + data_columns = axis_labels + data_columns = [c for c in data_columns if c in axis_labels] if len(data_columns): blocks = block_obj.reindex_axis(Index(axis_labels) - Index( @@ -2202,7 +2251,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, except (NotImplementedError): raise except (Exception), detail: - raise Exception("cannot find the correct atom type -> [dtype->%s] %s" % (b.dtype.name, str(detail))) + raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (b.dtype.name, b.items, str(detail))) j += 1 # validate the axes if we have an existing table @@ -2517,8 +2566,6 @@ def write_data_chunk(self, indexes, mask, search, values): self.table.append(rows) self.table.flush() except (Exception), detail: - import pdb - pdb.set_trace() raise Exception( "tables cannot write this data -> %s" % str(detail)) @@ -2630,6 +2677,51 @@ def read(self, where=None, columns=None, **kwargs): return df +class GenericTable(AppendableFrameTable): + """ a table that read/writes the generic pytables table format """ + pandas_kind = 'frame_table' + table_type = 'generic_table' + ndim = 2 + obj_type = DataFrame + + @property + def pandas_type(self): + return self.pandas_kind + + @property + def storable(self): + return getattr(self.group,'table',None) or self.group + + def get_attrs(self): + """ retrieve our attributes """ + self.non_index_axes = [] + self.nan_rep = None + self.levels = [] + t = self.table + self.index_axes = [ a.infer(t) for a in self.indexables if a.is_an_indexable ] + self.values_axes = [ a.infer(t) for a in self.indexables if not a.is_an_indexable ] + self.data_columns = [ a.name for a in self.values_axes ] + + @property + def indexables(self): + """ create the indexables from the table description """ + if self._indexables is None: + + d = self.description + + # the index columns is just a simple index + self._indexables = [ GenericIndexCol(name='index',axis=0) ] + + for i, n in enumerate(d._v_names): + + dc = GenericDataIndexableCol(name = n, pos=i, values = [ n ], version = self.version) + self._indexables.append(dc) + + return self._indexables + + def write(self, **kwargs): + raise NotImplementedError("cannot write on an generic table") + class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ table_type = 'appendable_multiframe' @@ -2643,6 +2735,8 @@ def table_type_short(self): def write(self, obj, data_columns=None, **kwargs): if data_columns is None: data_columns = [] + elif data_columns is True: + data_columns = obj.columns[:] for n in obj.index.names: if n not in data_columns: data_columns.insert(0, n) diff --git a/pandas/io/tests/pytables_native.h5 b/pandas/io/tests/pytables_native.h5 new file mode 100644 index 0000000000000..a01b0f1dca3c0 Binary files /dev/null and b/pandas/io/tests/pytables_native.h5 differ diff --git a/pandas/io/tests/pytables_native2.h5 b/pandas/io/tests/pytables_native2.h5 new file mode 100644 index 0000000000000..4786eea077533 Binary files /dev/null and b/pandas/io/tests/pytables_native2.h5 differ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index cb2d9dd2af58f..5e0fe8d292e16 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -9,7 +9,7 @@ from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, Index) -from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning +from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning, PerformanceWarning import pandas.util.testing as tm from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal @@ -111,6 +111,12 @@ def test_contains(self): self.assert_('/foo/b' not in self.store) self.assert_('bar' not in self.store) + # GH 2694 + warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) + self.store['node())'] = tm.makeDataFrame() + self.assert_('node())' in self.store) + warnings.filterwarnings('always', category=tables.NaturalNameWarning) + def test_versioning(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() @@ -254,6 +260,28 @@ def test_put_integer(self): df = DataFrame(np.random.randn(50, 100)) self._check_roundtrip(df, tm.assert_frame_equal) + def test_put_mixed_type(self): + df = tm.makeTimeDataFrame() + df['obj1'] = 'foo' + df['obj2'] = 'bar' + df['bool1'] = df['A'] > 0 + df['bool2'] = df['B'] > 0 + df['bool3'] = True + df['int1'] = 1 + df['int2'] = 2 + df['timestamp1'] = Timestamp('20010102') + df['timestamp2'] = Timestamp('20010103') + df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) + df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) + df.ix[3:6, ['obj1']] = np.nan + df = df.consolidate().convert_objects() + self.store.remove('df') + warnings.filterwarnings('ignore', category=PerformanceWarning) + self.store.put('df',df) + expected = self.store.get('df') + tm.assert_frame_equal(expected,df) + warnings.filterwarnings('always', category=PerformanceWarning) + def test_append(self): df = tm.makeTimeDataFrame() @@ -697,7 +725,7 @@ def test_big_table_frame(self): print "\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x) def test_big_table2_frame(self): - # this is a really big table: 2.5m rows x 300 float columns, 20 string + # this is a really big table: 1m rows x 60 float columns, 20 string, 20 datetime # columns raise nose.SkipTest('no big table2 frame') @@ -705,10 +733,12 @@ def test_big_table2_frame(self): print "\nbig_table2 start" import time start_time = time.time() - df = DataFrame(np.random.randn(2.5 * 1000 * 1000, 300), index=range(int( - 2.5 * 1000 * 1000)), columns=['E%03d' % i for i in xrange(300)]) - for x in range(20): + df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( + 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) + for x in xrange(20): df['String%03d' % x] = 'string%03d' % x + for x in xrange(20): + df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) print "\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time) fn = 'big_table2.h5' @@ -722,7 +752,7 @@ def f(chunksize): store.close() return r - for c in [10000, 50000, 100000, 250000]: + for c in [10000, 50000, 250000]: start_time = time.time() print "big_table2 frame [chunk->%s]" % c rows = f(c) @@ -731,6 +761,35 @@ def f(chunksize): finally: os.remove(fn) + def test_big_put_frame(self): + raise nose.SkipTest('no big put frame') + + print "\nbig_put start" + import time + start_time = time.time() + df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( + 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) + for x in xrange(20): + df['String%03d' % x] = 'string%03d' % x + for x in xrange(20): + df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) + + print "\nbig_put frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time) + fn = 'big_put.h5' + + try: + + start_time = time.time() + store = HDFStore(fn, mode='w') + store.put('df', df) + store.close() + + print df.get_dtype_counts() + print "big_put frame [shape->%s] -> %5.2f" % (df.shape, time.time() - start_time) + + finally: + os.remove(fn) + def test_big_table_panel(self): raise nose.SkipTest('no big table panel') @@ -748,7 +807,7 @@ def test_big_table_panel(self): x = time.time() try: store = HDFStore(self.scratchpath) - store.prof_append('wp', wp) + store.append('wp', wp) rows = store.root.wp.table.nrows recons = store.select('wp') finally: @@ -817,15 +876,35 @@ def test_table_index_incompatible_dtypes(self): def test_table_values_dtypes_roundtrip(self): df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8') - self.store.append('df1', df1) - assert df1.dtypes == self.store['df1'].dtypes + self.store.append('df_f8', df1) + assert df1.dtypes == self.store['df_f8'].dtypes df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8') - self.store.append('df2', df2) - assert df2.dtypes == self.store['df2'].dtypes + self.store.append('df_i8', df2) + assert df2.dtypes == self.store['df_i8'].dtypes # incompatible dtype - self.assertRaises(Exception, self.store.append, 'df2', df1) + self.assertRaises(Exception, self.store.append, 'df_i8', df1) + + # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought) + df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A']) + self.store.append('df_f4', df1) + assert df1.dtypes == self.store['df_f4'].dtypes + assert df1.dtypes[0] == 'float32' + + # check with mixed dtypes (but not multi float types) + df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) + df1['string'] = 'foo' + self.store.append('df_mixed_dtypes1', df1) + assert (df1.dtypes == self.store['df_mixed_dtypes1'].dtypes).all() == True + assert df1.dtypes[0] == 'float32' + assert df1.dtypes[1] == 'object' + + ### this is not supported, e.g. mixed float32/float64 blocks ### + #df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32']) + #df1['float64'] = 1.0 + #self.store.append('df_mixed_dtypes2', df1) + #assert df1.dtypes == self.store['df_mixed_dtypes2'].dtypes).all() == True def test_table_mixed_dtypes(self): @@ -1159,15 +1238,19 @@ def test_tuple_index(self): idx = [(0., 1.), (2., 3.), (4., 5.)] data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) + warnings.filterwarnings('ignore', category=PerformanceWarning) self._check_roundtrip(DF, tm.assert_frame_equal) + warnings.filterwarnings('always', category=PerformanceWarning) def test_index_types(self): values = np.random.randn(2) func = lambda l, r: tm.assert_series_equal(l, r, True, True, True) + warnings.filterwarnings('ignore', category=PerformanceWarning) ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) + warnings.filterwarnings('always', category=PerformanceWarning) ser = Series(values, [datetime.datetime.today(), 0]) self._check_roundtrip(ser, func) @@ -1175,11 +1258,15 @@ def test_index_types(self): ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) + warnings.filterwarnings('ignore', category=PerformanceWarning) ser = Series(values, [datetime.date.today(), 'a']) self._check_roundtrip(ser, func) + warnings.filterwarnings('always', category=PerformanceWarning) + warnings.filterwarnings('ignore', category=PerformanceWarning) ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) + warnings.filterwarnings('always', category=PerformanceWarning) ser = Series(values, [1, 1.53]) self._check_roundtrip(ser, func) @@ -1450,6 +1537,13 @@ def test_select(self): expected = df[df.A > 0].reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) + # all a data columns + self.store.remove('df') + self.store.append('df', df, data_columns=True) + result = self.store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) + # with a data column, but different columns self.store.remove('df') self.store.append('df', df, data_columns=['A']) @@ -1739,6 +1833,16 @@ def _check_roundtrip_table(self, obj, comparator, compression=False): store.close() os.remove(self.scratchpath) + def test_pytables_native_read(self): + pth = curpath() + store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r') + d2 = store['detector/readout'] + store.close() + store = HDFStore(os.path.join(pth, 'pytables_native2.h5'), 'r') + str(store) + d1 = store['detector'] + store.close() + def test_legacy_read(self): pth = curpath() store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r') @@ -1760,7 +1864,6 @@ def test_legacy_table_read(self): store.select('df2', typ='legacy_frame') # old version warning - import warnings warnings.filterwarnings('ignore', category=IncompatibilityWarning) self.assertRaises( Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) @@ -1812,7 +1915,7 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): if a.is_indexed: self.assert_(new_t[a.name].is_indexed == True) - except: + except (Exception), detail: pass finally: store.close() @@ -1899,9 +2002,11 @@ def test_tseries_indices_frame(self): def test_unicode_index(self): unicode_values = [u'\u03c3', u'\u03c3\u03c3'] - + warnings.filterwarnings('ignore', category=PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) + warnings.filterwarnings('always', category=PerformanceWarning) + def test_store_datetime_mixed(self): df = DataFrame(