diff --git a/doc/source/io.rst b/doc/source/io.rst index 73a7c2d1e1121..92042b6fe58c5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1794,27 +1794,31 @@ similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) os.remove('store_tl.h5') -.. _io.hdf5-storer: +.. _io.hdf5-fixed: -Storer Format -~~~~~~~~~~~~~ +Fixed Format +~~~~~~~~~~~~ + +.. note:: + + This was prior to 0.13.0 the ``Storer`` format. The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called -the ``storer`` format. These types of stores are are **not** appendable once written (though you can simply +the ``fixed`` format. These types of stores are are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be retrieved in their entirety. These offer very fast writing and slightly faster reading than ``table`` stores. -This format is specified by default when using ``put`` or by ``fmt='s'`` +This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'`` .. warning:: - A ``storer`` format will raise a ``TypeError`` if you try to retrieve using a ``where`` . + A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where`` . .. code-block:: python - DataFrame(randn(10,2)).to_hdf('test_storer.h5','df') + DataFrame(randn(10,2)).to_hdf('test_fixed.h5','df') - pd.read_hdf('test_storer.h5','df',where='index>5') - TypeError: cannot pass a where specification when reading a non-table + pd.read_hdf('test_fixed.h5','df',where='index>5') + TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -1827,7 +1831,11 @@ Table Format format. Conceptually a ``table`` is shaped very much like a DataFrame, with rows and columns. A ``table`` may be appended to in the same or other sessions. In addition, delete & query type operations are -supported. This format is specified by ``fmt='t'`` to ``append`` or ``put``. +supported. This format is specified by ``format='table'`` or ``format='t'`` +to ``append`` or ``put`` or ``to_hdf`` + +This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to +enable ``put/append/to_hdf`` to by default store in the ``table`` format. .. ipython:: python :suppress: @@ -1854,7 +1862,7 @@ supported. This format is specified by ``fmt='t'`` to ``append`` or ``put``. .. note:: - You can also create a ``table`` by passing ``fmt='t'`` to a ``put`` operation. + You can also create a ``table`` by passing ``format='table'`` or ``format='t'`` to a ``put`` operation. .. _io.hdf5-keys: @@ -2363,7 +2371,7 @@ Starting in 0.11, passing a ``min_itemsize`` dict will cause all passed columns External Compatibility ~~~~~~~~~~~~~~~~~~~~~~ -``HDFStore`` write storer objects in specific formats suitable for +``HDFStore`` write ``table`` format objects in specific formats suitable for producing loss-less roundtrips to pandas objects. For external compatibility, ``HDFStore`` can read native ``PyTables`` format tables. It is possible to write an ``HDFStore`` object that can easily diff --git a/doc/source/release.rst b/doc/source/release.rst index f492570c9bb0b..9247b9bd44382 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -108,10 +108,11 @@ pandas 0.13 - removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`) - allow a passed locations array or mask as a ``where`` condition (:issue:`4467`) - - the ``fmt`` keyword now replaces the ``table`` keyword; allowed values are ``s|t`` - add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written to the store (default is ``True``, ALL nan rows are NOT written), also settable via the option ``io.hdf.dropna_table`` (:issue:`4625`) + - the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)|table(t)`` + the ``Storer`` format has been renamed to ``Fixed`` - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. Options diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index f8a565157a04c..5791a50684144 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -79,17 +79,17 @@ API changes - allow a passed locations array or mask as a ``where`` condition (:issue:`4467`). See :ref:`here` for an example. - - the ``fmt`` keyword now replaces the ``table`` keyword; allowed values are ``s|t`` - the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 's' (Storer) format - and ``append`` imples 't' (Table) format + - the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)`` or ``table(t)`` + the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 'fixed` or 'f' (Fixed) format + and ``append`` imples 'table' or 't' (Table) format .. ipython:: python path = 'test.h5' df = DataFrame(randn(10,2)) - df.to_hdf(path,'df_table',fmt='t') + df.to_hdf(path,'df_table',format='table') df.to_hdf(path,'df_table2',append=True) - df.to_hdf(path,'df_storer') + df.to_hdf(path,'df_fixed') with get_store(path) as store: print store diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d15ce05e84d40..47fed4ea911e5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -678,6 +678,15 @@ def to_hdf(self, path_or_buf, key, **kwargs): and if the file does not exist it is created. ``'r+'`` It is similar to ``'a'``, but the file must already exist. + format : 'fixed(f)|table(t)', default is 'fixed' + fixed(f) : Fixed format + Fast writing/reading. Not-appendable, nor searchable + table(t) : Table format + Write as a PyTables Table structure which may perform worse but + allow more flexible operations like searching / selecting subsets + of the data + append : boolean, default False + For Table formats, append the input data to the existing complevel : int, 1-9, default 0 If a complib is specified compression will be applied where possible diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 33921b7e534e5..aa1c20d582b5b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -103,19 +103,19 @@ class DuplicateWarning(Warning): # formats _FORMAT_MAP = { - u('s') : 's', - u('storer') : 's', - u('t') : 't', - u('table') : 't', + u('f') : 'fixed', + u('fixed') : 'fixed', + u('t') : 'table', + u('table') : 'table', } -fmt_deprecate_doc = """ +format_deprecate_doc = """ the table keyword has been deprecated -use the fmt='s|t' keyword instead - s : specifies the Storer format - and is the default for put operations - t : specifies the Table format - and is the default for append operations +use the format='fixed(f)|table(t)' keyword instead + fixed(f) : specifies the Fixed format + and is the default for put operations + table(t) : specifies the Table format + and is the default for append operations """ # map object types @@ -133,16 +133,16 @@ class DuplicateWarning(Warning): # storer class map _STORER_MAP = { - u('TimeSeries'): 'LegacySeriesStorer', - u('Series'): 'LegacySeriesStorer', - u('DataFrame'): 'LegacyFrameStorer', - u('DataMatrix'): 'LegacyFrameStorer', - u('series'): 'SeriesStorer', - u('sparse_series'): 'SparseSeriesStorer', - u('frame'): 'FrameStorer', - u('sparse_frame'): 'SparseFrameStorer', - u('wide'): 'PanelStorer', - u('sparse_panel'): 'SparsePanelStorer', + u('TimeSeries'): 'LegacySeriesFixed', + u('Series'): 'LegacySeriesFixed', + u('DataFrame'): 'LegacyFrameFixed', + u('DataMatrix'): 'LegacyFrameFixed', + u('series'): 'SeriesFixed', + u('sparse_series'): 'SparseSeriesFixed', + u('frame'): 'FrameFixed', + u('sparse_frame'): 'SparseFrameFixed', + u('wide'): 'PanelFixed', + u('sparse_panel'): 'SparsePanelFixed', } # table class map @@ -172,10 +172,17 @@ class DuplicateWarning(Warning): : boolean drop ALL nan rows when appending to a table """ +format_doc = """ +: format + default format writing format, if None, then + put will default to 'fixed' and append will default to 'table' +""" with config.config_prefix('io.hdf'): config.register_option('dropna_table', True, dropna_doc, validator=config.is_bool) + config.register_option('default_format', None, format_doc, + validator=config.is_one_of_factory(['fixed','table',None])) # oh the troubles to reduce import time _table_mod = None @@ -294,11 +301,7 @@ class HDFStore(StringMixin): """ dict-like IO interface for storing pandas objects in PyTables - format. - - DataFrame and Panel can be stored in Table format, which is slower to - read and write but can be searched and manipulated more like an SQL - table. See HDFStore.put for more information + either Fixed or Table format. Parameters ---------- @@ -670,7 +673,7 @@ def func(_start, _stop): return TableIterator(self, func, nrows=nrows, start=start, stop=stop, auto_close=auto_close).get_values() - def put(self, key, value, fmt=None, append=False, **kwargs): + def put(self, key, value, format=None, append=False, **kwargs): """ Store object in HDFStore @@ -678,19 +681,20 @@ def put(self, key, value, fmt=None, append=False, **kwargs): ---------- key : object value : {Series, DataFrame, Panel} - fmt : 's|t', default is 's' for storer format - s : storer format - Fast writing/reading. Not-appendable, nor searchable - t : table format - Write as a PyTables Table structure which may perform worse but - allow more flexible operations like searching / selecting subsets - of the data + format : 'fixed(f)|table(t)', default is 'fixed' + fixed(f) : Fixed format + Fast writing/reading. Not-appendable, nor searchable + table(t) : Table format + Write as a PyTables Table structure which may perform worse but + allow more flexible operations like searching / selecting subsets + of the data append : boolean, default False - For table data structures, append the input data to the existing - table + For Table format, append the input data to the existing encoding : default None, provide an encoding for strings """ - kwargs = self._validate_format(fmt or 's', kwargs) + if format is None: + format = get_option("io.hdf.default_format") or 'fixed' + kwargs = self._validate_format(format, kwargs) self._write_to_group(key, value, append=append, **kwargs) def remove(self, key, where=None, start=None, stop=None): @@ -742,7 +746,7 @@ def remove(self, key, where=None, start=None, stop=None): 'can only remove with where on objects written as tables') return s.delete(where=where, start=start, stop=stop) - def append(self, key, value, fmt=None, append=True, columns=None, dropna=None, **kwargs): + def append(self, key, value, format=None, append=True, columns=None, dropna=None, **kwargs): """ Append to Table in file. Node must already exist and be Table format. @@ -751,11 +755,11 @@ def append(self, key, value, fmt=None, append=True, columns=None, dropna=None, * ---------- key : object value : {Series, DataFrame, Panel, Panel4D} - fmt : 't', default is 't' for table format - t : table format - Write as a PyTables Table structure which may perform worse but - allow more flexible operations like searching / selecting subsets - of the data + format: 'table' is the default + table(t) : table format + Write as a PyTables Table structure which may perform worse but + allow more flexible operations like searching / selecting subsets + of the data append : boolean, default True, append the input data to the existing data_columns : list of columns to create as data columns, or True to use all columns min_itemsize : dict of columns that specify minimum string sizes @@ -776,7 +780,9 @@ def append(self, key, value, fmt=None, append=True, columns=None, dropna=None, * if dropna is None: dropna = get_option("io.hdf.dropna_table") - kwargs = self._validate_format(fmt or 't', kwargs) + if format is None: + format = get_option("io.hdf.default_format") or 'table' + kwargs = self._validate_format(format, kwargs) self._write_to_group(key, value, append=append, dropna=dropna, **kwargs) def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, **kwargs): @@ -864,7 +870,7 @@ def create_table_index(self, key, **kwargs): return if not s.is_table: - raise TypeError("cannot create table index on a non-table") + raise TypeError("cannot create table index on a Fixed format store") s.create_index(**kwargs) def groups(self): @@ -942,39 +948,36 @@ def _check_if_open(self): if not self.is_open: raise ClosedFileError("{0} file is not open!".format(self._path)) - def _validate_format(self, fmt, kwargs): + def _validate_format(self, format, kwargs): """ validate / deprecate formats; return the new kwargs """ kwargs = kwargs.copy() - if 'format' in kwargs: - raise TypeError("pls specify an object format with the 'fmt' keyword") - # table arg table = kwargs.pop('table',None) if table is not None: - warnings.warn(fmt_deprecate_doc,FutureWarning) + warnings.warn(format_deprecate_doc,FutureWarning) if table: - fmt = 't' + format = 'table' else: - fmt = 's' + format = 'fixed' # validate try: - kwargs['fmt'] = _FORMAT_MAP[fmt.lower()] + kwargs['format'] = _FORMAT_MAP[format.lower()] except: - raise TypeError("invalid HDFStore format specified [{0}]".format(fmt)) + raise TypeError("invalid HDFStore format specified [{0}]".format(format)) return kwargs - def _create_storer(self, group, fmt=None, value=None, append=False, **kwargs): - """ return a suitable Storer class to operate """ + def _create_storer(self, group, format=None, value=None, append=False, **kwargs): + """ return a suitable class to operate """ def error(t): raise TypeError( - "cannot properly create the storer for: [%s] [group->%s,value->%s,fmt->%s,append->%s,kwargs->%s]" % - (t, group, type(value), fmt, append, kwargs)) + "cannot properly create the storer for: [%s] [group->%s,value->%s,format->%s,append->%s,kwargs->%s]" % + (t, group, type(value), format, append, kwargs)) pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None)) tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None)) @@ -998,7 +1001,7 @@ def error(t): error('_TYPE_MAP') # we are actually a table - if fmt == 't': + if format == 'table': pt += u('_table') # a storer node @@ -1050,7 +1053,7 @@ def error(t): error('_TABLE_MAP') def _write_to_group( - self, key, value, fmt, index=True, append=False, + self, key, value, format, index=True, append=False, complib=None, encoding=None, **kwargs): group = self.get_node(key) @@ -1061,7 +1064,7 @@ def _write_to_group( # we don't want to store a table node at all if are object is 0-len # as there are not dtypes - if getattr(value,'empty',None) and (fmt == 't' or append): + if getattr(value,'empty',None) and (format == 'table' or append): return if group is None: @@ -1081,12 +1084,12 @@ def _write_to_group( group = self._handle.createGroup(path, p) path = new_path - s = self._create_storer(group, fmt, value, append=append, + s = self._create_storer(group, format, value, append=append, encoding=encoding, **kwargs) if append: - # raise if we are trying to append to a non-table, + # raise if we are trying to append to a Fixed format, # or a table that exists (and we are putting) - if not s.is_table or (s.is_table and fmt == 's' and s.is_exists): + if not s.is_table or (s.is_table and format == 'fixed' and s.is_exists): raise ValueError('Can only append to Tables') if not s.is_exists: s.set_object_info() @@ -1094,7 +1097,7 @@ def _write_to_group( s.set_object_info() if not s.is_table and complib: - raise ValueError('Compression not supported on non-table') + raise ValueError('Compression not supported on Fixed format stores') # write the object s.write(obj=value, append=append, complib=complib, **kwargs) @@ -1765,11 +1768,11 @@ def get_attr(self): pass -class Storer(StringMixin): +class Fixed(StringMixin): """ represent an object in my store - facilitate read/write of various types of objects - this is an abstract base class + facilitate read/write of various types of objects + this is an abstract base class Parameters ---------- @@ -1921,9 +1924,9 @@ def delete(self, where=None, **kwargs): raise TypeError("cannot delete on an abstract storer") -class GenericStorer(Storer): +class GenericFixed(Fixed): - """ a generified storer version """ + """ a generified fixed version """ _index_type_map = {DatetimeIndex: 'datetime', PeriodIndex: 'period'} _reverse_index_map = dict([(v, k) @@ -1950,10 +1953,10 @@ def f(values, freq=None, tz=None): def validate_read(self, kwargs): if kwargs.get('columns') is not None: - raise TypeError("cannot pass a column specification when reading a non-table " + raise TypeError("cannot pass a column specification when reading a Fixed format store." "this store must be selected in its entirety") if kwargs.get('where') is not None: - raise TypeError("cannot pass a where specification when reading from a non-table " + raise TypeError("cannot pass a where specification when reading from a Fixed format store." "this store must be selected in its entirety") @property @@ -2212,7 +2215,7 @@ def write_array(self, key, value, items=None): getattr(self.group, key)._v_attrs.transposed = transposed -class LegacyStorer(GenericStorer): +class LegacyFixed(GenericFixed): def read_index_legacy(self, key): node = getattr(self.group, key) @@ -2221,7 +2224,7 @@ def read_index_legacy(self, key): return _unconvert_index_legacy(data, kind, encoding=self.encoding) -class LegacySeriesStorer(LegacyStorer): +class LegacySeriesFixed(LegacyFixed): def read(self, **kwargs): self.validate_read(kwargs) @@ -2230,7 +2233,7 @@ def read(self, **kwargs): return Series(values, index=index) -class LegacyFrameStorer(LegacyStorer): +class LegacyFrameFixed(LegacyFixed): def read(self, **kwargs): self.validate_read(kwargs) @@ -2240,7 +2243,7 @@ def read(self, **kwargs): return DataFrame(values, index=index, columns=columns) -class SeriesStorer(GenericStorer): +class SeriesFixed(GenericFixed): pandas_kind = u('series') attributes = ['name'] @@ -2262,13 +2265,13 @@ def read(self, **kwargs): return Series(values, index=index, name=self.name) def write(self, obj, **kwargs): - super(SeriesStorer, self).write(obj, **kwargs) + super(SeriesFixed, self).write(obj, **kwargs) self.write_index('index', obj.index) self.write_array('values', obj.values) self.attrs.name = obj.name -class SparseSeriesStorer(GenericStorer): +class SparseSeriesFixed(GenericFixed): pandas_kind = u('sparse_series') attributes = ['name', 'fill_value', 'kind'] @@ -2282,7 +2285,7 @@ def read(self, **kwargs): name=self.name) def write(self, obj, **kwargs): - super(SparseSeriesStorer, self).write(obj, **kwargs) + super(SparseSeriesFixed, self).write(obj, **kwargs) self.write_index('index', obj.index) self.write_index('sp_index', obj.sp_index) self.write_array('sp_values', obj.sp_values) @@ -2291,7 +2294,7 @@ def write(self, obj, **kwargs): self.attrs.kind = obj.kind -class SparseFrameStorer(GenericStorer): +class SparseFrameFixed(GenericFixed): pandas_kind = u('sparse_frame') attributes = ['default_kind', 'default_fill_value'] @@ -2301,7 +2304,7 @@ def read(self, **kwargs): sdict = {} for c in columns: key = 'sparse_series_%s' % c - s = SparseSeriesStorer(self.parent, getattr(self.group, key)) + s = SparseSeriesFixed(self.parent, getattr(self.group, key)) s.infer_axes() sdict[c] = s.read() return SparseDataFrame(sdict, columns=columns, @@ -2310,21 +2313,21 @@ def read(self, **kwargs): def write(self, obj, **kwargs): """ write it as a collection of individual sparse series """ - super(SparseFrameStorer, self).write(obj, **kwargs) + super(SparseFrameFixed, self).write(obj, **kwargs) for name, ss in compat.iteritems(obj): key = 'sparse_series_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) else: node = getattr(self.group, key) - s = SparseSeriesStorer(self.parent, node) + s = SparseSeriesFixed(self.parent, node) s.write(ss) self.attrs.default_fill_value = obj.default_fill_value self.attrs.default_kind = obj.default_kind self.write_index('columns', obj.columns) -class SparsePanelStorer(GenericStorer): +class SparsePanelFixed(GenericFixed): pandas_kind = u('sparse_panel') attributes = ['default_kind', 'default_fill_value'] @@ -2336,14 +2339,14 @@ def read(self, **kwargs): for name in items: key = 'sparse_frame_%s' % name node = getattr(self.group, key) - s = SparseFrameStorer(self.parent, getattr(self.group, key)) + s = SparseFrameFixed(self.parent, getattr(self.group, key)) s.infer_axes() sdict[name] = s.read() return SparsePanel(sdict, items=items, default_kind=self.default_kind, default_fill_value=self.default_fill_value) def write(self, obj, **kwargs): - super(SparsePanelStorer, self).write(obj, **kwargs) + super(SparsePanelFixed, self).write(obj, **kwargs) self.attrs.default_fill_value = obj.default_fill_value self.attrs.default_kind = obj.default_kind self.write_index('items', obj.items) @@ -2354,11 +2357,11 @@ def write(self, obj, **kwargs): node = self._handle.createGroup(self.group, key) else: node = getattr(self.group, key) - s = SparseFrameStorer(self.parent, node) + s = SparseFrameFixed(self.parent, node) s.write(sdf) -class BlockManagerStorer(GenericStorer): +class BlockManagerFixed(GenericFixed): attributes = ['ndim', 'nblocks'] is_shape_reversed = False @@ -2412,7 +2415,7 @@ def read(self, **kwargs): return self.obj_type(BlockManager(blocks, axes)) def write(self, obj, **kwargs): - super(BlockManagerStorer, self).write(obj, **kwargs) + super(BlockManagerFixed, self).write(obj, **kwargs) data = obj._data if not data.is_consolidated(): data = data.consolidate() @@ -2430,22 +2433,22 @@ def write(self, obj, **kwargs): self.write_index('block%d_items' % i, blk.items) -class FrameStorer(BlockManagerStorer): +class FrameFixed(BlockManagerFixed): pandas_kind = u('frame') obj_type = DataFrame -class PanelStorer(BlockManagerStorer): +class PanelFixed(BlockManagerFixed): pandas_kind = u('wide') obj_type = Panel is_shape_reversed = True def write(self, obj, **kwargs): obj._consolidate_inplace() - return super(PanelStorer, self).write(obj, **kwargs) + return super(PanelFixed, self).write(obj, **kwargs) -class Table(Storer): +class Table(Fixed): """ represent a table: facilitate read/write of various types of tables @@ -3992,7 +3995,7 @@ def eval(self): else: raise TypeError( - "passing a filterable condition to a non-table indexer [%s]" % str(self)) + "passing a filterable condition to a Fixed format indexer [%s]" % str(self)) def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ab7e5cf813b24..0ab7a32938646 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -150,38 +150,38 @@ def roundtrip(key, obj,**kwargs): def test_api(self): # GH4584 - # API issue when to_hdf doesn't acdept append AND table args + # API issue when to_hdf doesn't acdept append AND format args with tm.ensure_clean(self.path) as path: df = tm.makeDataFrame() - df.iloc[:10].to_hdf(path,'df',append=True,table=True) - df.iloc[10:].to_hdf(path,'df',append=True,table=True) + df.iloc[:10].to_hdf(path,'df',append=True,format='table') + df.iloc[10:].to_hdf(path,'df',append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) # append to False - df.iloc[:10].to_hdf(path,'df',append=False,table=True) - df.iloc[10:].to_hdf(path,'df',append=True,table=True) + df.iloc[:10].to_hdf(path,'df',append=False,format='table') + df.iloc[10:].to_hdf(path,'df',append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) with tm.ensure_clean(self.path) as path: df = tm.makeDataFrame() df.iloc[:10].to_hdf(path,'df',append=True) - df.iloc[10:].to_hdf(path,'df',append=True,table='t') + df.iloc[10:].to_hdf(path,'df',append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) # append to False - df.iloc[:10].to_hdf(path,'df',append=False,table='t') + df.iloc[:10].to_hdf(path,'df',append=False,format='table') df.iloc[10:].to_hdf(path,'df',append=True) assert_frame_equal(read_hdf(path,'df'),df) with tm.ensure_clean(self.path) as path: df = tm.makeDataFrame() - df.to_hdf(path,'df',append=False,table=False) + df.to_hdf(path,'df',append=False,format='fixed') assert_frame_equal(read_hdf(path,'df'),df) - df.to_hdf(path,'df',append=False,fmt='s') + df.to_hdf(path,'df',append=False,format='f') assert_frame_equal(read_hdf(path,'df'),df) df.to_hdf(path,'df',append=False) @@ -193,34 +193,77 @@ def test_api(self): with ensure_clean(self.path) as store: df = tm.makeDataFrame() - store.append('df',df.iloc[:10],append=True,table=True) - store.append('df',df.iloc[10:],append=True,table=True) + store.append('df',df.iloc[:10],append=True,format='table') + store.append('df',df.iloc[10:],append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) # append to False - store.append('df',df.iloc[:10],append=False,table=True) - store.append('df',df.iloc[10:],append=True,table=True) + store.append('df',df.iloc[:10],append=False,format='table') + store.append('df',df.iloc[10:],append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) # formats - store.append('df',df.iloc[:10],append=False,fmt='t') - store.append('df',df.iloc[10:],append=True,fmt='t') + store.append('df',df.iloc[:10],append=False,format='table') + store.append('df',df.iloc[10:],append=True,format='table') assert_frame_equal(read_hdf(path,'df'),df) _maybe_remove(store,'df') - store.append('df',df.iloc[:10],append=False,fmt='t') - store.append('df',df.iloc[10:],append=True,fmt=None) + store.append('df',df.iloc[:10],append=False,format='table') + store.append('df',df.iloc[10:],append=True,format=None) assert_frame_equal(read_hdf(path,'df'),df) with tm.ensure_clean(self.path) as path: # invalid df = tm.makeDataFrame() - self.assertRaises(ValueError, df.to_hdf, path,'df',append=True,fmt='s') + self.assertRaises(ValueError, df.to_hdf, path,'df',append=True,format='f') + self.assertRaises(ValueError, df.to_hdf, path,'df',append=True,format='fixed') - self.assertRaises(TypeError, df.to_hdf, path,'df',append=True,fmt='foo') - self.assertRaises(TypeError, df.to_hdf, path,'df',append=False,fmt='bar') - self.assertRaises(TypeError, df.to_hdf, path,'df',format='s') + self.assertRaises(TypeError, df.to_hdf, path,'df',append=True,format='foo') + self.assertRaises(TypeError, df.to_hdf, path,'df',append=False,format='bar') + + + def test_api_default_format(self): + + # default_format option + with ensure_clean(self.path) as store: + df = tm.makeDataFrame() + + pandas.set_option('io.hdf.default_format','fixed') + _maybe_remove(store,'df') + store.put('df',df) + self.assert_(not store.get_storer('df').is_table) + self.assertRaises(ValueError, store.append, 'df2',df) + + pandas.set_option('io.hdf.default_format','table') + _maybe_remove(store,'df') + store.put('df',df) + self.assert_(store.get_storer('df').is_table) + _maybe_remove(store,'df2') + store.append('df2',df) + self.assert_(store.get_storer('df').is_table) + + pandas.set_option('io.hdf.default_format',None) + + with tm.ensure_clean(self.path) as path: + + df = tm.makeDataFrame() + + pandas.set_option('io.hdf.default_format','fixed') + df.to_hdf(path,'df') + with get_store(path) as store: + self.assert_(not store.get_storer('df').is_table) + self.assertRaises(ValueError, df.to_hdf, path,'df2', append=True) + + pandas.set_option('io.hdf.default_format','table') + df.to_hdf(path,'df3') + with get_store(path) as store: + self.assert_(store.get_storer('df3').is_table) + df.to_hdf(path,'df4',append=True) + with get_store(path) as store: + self.assert_(store.get_storer('df4').is_table) + + pandas.set_option('io.hdf.default_format',None) def test_keys(self): @@ -466,7 +509,7 @@ def test_put(self): store['foo/bar/bah'] = df[:10] store['foo'] = df[:10] store['/foo'] = df[:10] - store.put('c', df[:10], table=True) + store.put('c', df[:10], format='table') # not OK, not a table self.assertRaises( @@ -481,7 +524,7 @@ def test_put(self): self.assertRaises(ValueError, store.put, 'c', df[10:], append=True) # overwrite table - store.put('c', df[:10], table=True, append=False) + store.put('c', df[:10], format='table', append=False) tm.assert_frame_equal(df[:10], store['c']) def test_put_string_index(self): @@ -514,12 +557,12 @@ def test_put_compression(self): with ensure_clean(self.path) as store: df = tm.makeTimeDataFrame() - store.put('c', df, table=True, complib='zlib') + store.put('c', df, format='table', complib='zlib') tm.assert_frame_equal(store['c'], df) - # can't compress if table=False + # can't compress if format='fixed' self.assertRaises(ValueError, store.put, 'b', df, - table=False, complib='zlib') + format='fixed', complib='zlib') def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') @@ -527,11 +570,11 @@ def test_put_compression_blosc(self): with ensure_clean(self.path) as store: - # can't compress if table=False + # can't compress if format='fixed' self.assertRaises(ValueError, store.put, 'b', df, - table=False, complib='blosc') + format='fixed', complib='blosc') - store.put('c', df, table=True, complib='blosc') + store.put('c', df, format='table', complib='blosc') tm.assert_frame_equal(store['c'], df) def test_put_integer(self): @@ -577,7 +620,7 @@ def test_append(self): tm.assert_frame_equal(store['df1'], df) _maybe_remove(store, 'df2') - store.put('df2', df[:10], table=True) + store.put('df2', df[:10], format='table') store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) @@ -1376,7 +1419,7 @@ def test_append_diff_item_order(self): wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :] with ensure_clean(self.path) as store: - store.put('panel', wp1, table=True) + store.put('panel', wp1, format='table') self.assertRaises(ValueError, store.put, 'panel', wp2, append=True) @@ -1400,7 +1443,7 @@ def test_append_hierarchical(self): tm.assert_frame_equal(result,expected) with tm.ensure_clean('test.hdf') as path: - df.to_hdf(path,'df',table=True) + df.to_hdf(path,'df',format='table') result = read_hdf(path,'df',columns=['A','B']) expected = df.reindex(columns=['A','B']) tm.assert_frame_equal(result,expected) @@ -1541,9 +1584,9 @@ def test_table_index_incompatible_dtypes(self): index=date_range('1/1/2000', periods=3)) with ensure_clean(self.path) as store: - store.put('frame', df1, table=True) + store.put('frame', df1, format='table') self.assertRaises(TypeError, store.put, 'frame', df2, - table=True, append=True) + format='table', append=True) def test_table_values_dtypes_roundtrip(self): @@ -1777,7 +1820,7 @@ def test_remove_where(self): # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='t') store.remove('wp', [('minor_axis', ['A', 'D'])]) rs = store.select('wp') expected = wp.reindex(minor_axis=['B', 'C']) @@ -1785,7 +1828,7 @@ def test_remove_where(self): # empty where _maybe_remove(store, 'wp') - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='table') # deleted number (entire table) n = store.remove('wp', []) @@ -1793,12 +1836,12 @@ def test_remove_where(self): # non - empty where _maybe_remove(store, 'wp') - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='table') self.assertRaises(ValueError, store.remove, 'wp', ['foo']) # selectin non-table with a where - # store.put('wp2', wp, fmt='s') + # store.put('wp2', wp, format='f') # self.assertRaises(ValueError, store.remove, # 'wp2', [('column', ['A', 'D'])]) @@ -1811,7 +1854,7 @@ def test_remove_crit(self): # group row removal date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) crit4 = Term('major_axis', date4) - store.put('wp3', wp, fmt='t') + store.put('wp3', wp, format='table') n = store.remove('wp3', where=[crit4]) assert(n == 36) result = store.select('wp3') @@ -1819,7 +1862,7 @@ def test_remove_crit(self): assert_panel_equal(result, expected) # upper half - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='table') date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('major_axis', '>', date) @@ -1836,7 +1879,7 @@ def test_remove_crit(self): assert_panel_equal(result, expected) # individual row elements - store.put('wp2', wp, fmt='t') + store.put('wp2', wp, format='table') date1 = wp.major_axis[1:3] crit1 = Term('major_axis', date1) @@ -1862,7 +1905,7 @@ def test_remove_crit(self): assert_panel_equal(result, expected) # corners - store.put('wp4', wp, fmt='t') + store.put('wp4', wp, format='table') n = store.remove( 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) result = store.select('wp4') @@ -1874,8 +1917,8 @@ def test_terms(self): wp = tm.makePanel() p4d = tm.makePanel4D() - store.put('wp', wp, fmt='t') - store.put('p4d', p4d, fmt='t') + store.put('wp', wp, format='table') + store.put('p4d', p4d, format='table') # some invalid terms terms = [ @@ -2230,8 +2273,8 @@ def test_wide_table(self): def test_wide_table_dups(self): wp = tm.makePanel() with ensure_clean(self.path) as store: - store.put('panel', wp, fmt='t') - store.put('panel', wp, fmt='t', append=True) + store.put('panel', wp, format='table') + store.put('panel', wp, format='table', append=True) with tm.assert_produces_warning(expected_warning=DuplicateWarning): recons = store['panel'] @@ -2297,7 +2340,7 @@ def test_select(self): # put/select ok _maybe_remove(store, 'wp') - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='table') store.select('wp') # non-table ok (where = None) @@ -2483,7 +2526,7 @@ def test_select_iterator(self): with tm.ensure_clean(self.path) as path: df = tm.makeTimeDataFrame(500) - df.to_hdf(path,'df',fmt='t') + df.to_hdf(path,'df',format='table') results = [] for x in read_hdf(path,'df',chunksize=100): @@ -2534,7 +2577,7 @@ def test_retain_index_attributes(self): with ensure_clean(self.path) as store: _maybe_remove(store,'data') - store.put('data', df, fmt='t') + store.put('data', df, format='table') result = store.get('data') tm.assert_frame_equal(df,result) @@ -2592,7 +2635,7 @@ def test_panel_select(self): wp = tm.makePanel() with ensure_clean(self.path) as store: - store.put('wp', wp, fmt='t') + store.put('wp', wp, format='table') date = wp.major_axis[len(wp.major_axis) // 2] crit1 = ('major_axis', '>=', date) @@ -2612,7 +2655,7 @@ def test_frame_select(self): df = tm.makeTimeDataFrame() with ensure_clean(self.path) as store: - store.put('frame', df,fmt='t') + store.put('frame', df,format='table') date = df.index[len(df) // 2] crit1 = ('index', '>=', date) @@ -2920,7 +2963,7 @@ def test_select_filter_corner(self): df.columns = ['%.3d' % c for c in df.columns] with ensure_clean(self.path) as store: - store.put('frame', df, fmt='t') + store.put('frame', df, format='table') crit = Term('columns', df.columns[:75]) result = store.select('frame', [crit]) @@ -2958,7 +3001,7 @@ def _check_roundtrip_table(self, obj, comparator, compression=False): options['complib'] = _default_compressor with ensure_clean(self.path, 'w', **options) as store: - store.put('obj', obj, fmt='t') + store.put('obj', obj, format='table') retrieved = store['obj'] # sorted_obj = _test_sort(obj) comparator(retrieved, obj) @@ -2969,7 +3012,7 @@ def test_multiple_open_close(self): with tm.ensure_clean(self.path) as path: df = tm.makeDataFrame() - df.to_hdf(path,'df',mode='w',fmt='t') + df.to_hdf(path,'df',mode='w',format='table') # single store = HDFStore(path) @@ -3031,7 +3074,7 @@ def test_multiple_open_close(self): with tm.ensure_clean(self.path) as path: df = tm.makeDataFrame() - df.to_hdf(path,'df',mode='w',fmt='t') + df.to_hdf(path,'df',mode='w',format='table') store = HDFStore(path) store.close() @@ -3274,7 +3317,7 @@ def test_store_datetime_mixed(self): # index=[np.arange(5).repeat(2), # np.tile(np.arange(2), 5)]) - # self.assertRaises(Exception, store.put, 'foo', df, fmt='t') + # self.assertRaises(Exception, store.put, 'foo', df, format='table') def _test_sort(obj):