diff --git a/RELEASE.rst b/RELEASE.rst
index 981fa5bed257d..021b3e64e12f8 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -22,6 +22,12 @@ Where to get it
* Binary installers on PyPI: http://pypi.python.org/pypi/pandas
* Documentation: http://pandas.pydata.org
+ ``HDFStore``
+
+ - Fix weird PyTables error when using too many selectors in a where
+ - Provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df'])
+ - Internally, change all variables to be private-like (now have leading underscore)
+
pandas 0.10.1
=============
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 2b0145dba5f24..a2f30dc14e29f 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -975,8 +975,8 @@ one can use the ExcelWriter class, as in the following example:
HDF5 (PyTables)
---------------
-``HDFStore`` is a dict-like object which reads and writes pandas to the high
-performance HDF5 format using the excellent `PyTables
+``HDFStore`` is a dict-like object which reads and writes pandas using
+the high performance HDF5 format using the excellent `PyTables
`__ library.
.. ipython:: python
@@ -990,7 +990,8 @@ performance HDF5 format using the excellent `PyTables
store = HDFStore('store.h5')
print store
-Objects can be written to the file just like adding key-value pairs to a dict:
+Objects can be written to the file just like adding key-value pairs to a
+dict:
.. ipython:: python
@@ -1021,6 +1022,9 @@ In a current or later Python session, you can retrieve stored objects:
# store.get('df') is an equivalent method
store['df']
+ # dotted (attribute) access provides get as well
+ store.df
+
Deletion of the object specified by the key
.. ipython:: python
@@ -1037,7 +1041,8 @@ Closing a Store
# closing a store
store.close()
- # Working with, and automatically closing the store with the context manager.
+ # Working with, and automatically closing the store with the context
+ # manager
with get_store('store.h5') as store:
store.keys()
@@ -1049,15 +1054,19 @@ Closing a Store
os.remove('store.h5')
-These stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be retrieved in their entirety.
+These stores are **not** appendable once written (though you can simply
+remove them and rewrite). Nor are they **queryable**; they must be
+retrieved in their entirety.
Storing in Table format
~~~~~~~~~~~~~~~~~~~~~~~
-``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` format. Conceptually a ``table`` is shaped
-very much like a DataFrame, with rows and columns. A ``table`` may be appended to in the same or other sessions.
-In addition, delete & query type operations are supported.
+``HDFStore`` supports another ``PyTables`` format on disk, the ``table``
+format. Conceptually a ``table`` is shaped very much like a DataFrame,
+with rows and columns. A ``table`` may be appended to in the same or
+other sessions. In addition, delete & query type operations are
+supported.
.. ipython:: python
:suppress:
@@ -1085,7 +1094,12 @@ In addition, delete & query type operations are supported.
Hierarchical Keys
~~~~~~~~~~~~~~~~~
-Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables parlance). Keys can be specified with out the leading '/' and are ALWAYS absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove everying in the sub-store and BELOW, so be *careful*.
+Keys to a store can be specified as a string. These can be in a
+hierarchical path-name like format (e.g. ``foo/bar/bah``), which will
+generate a hierarchy of sub-stores (or ``Groups`` in PyTables
+parlance). Keys can be specified with out the leading '/' and are ALWAYS
+absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove
+everying in the sub-store and BELOW, so be *careful*.
.. ipython:: python
@@ -1104,8 +1118,16 @@ Keys to a store can be specified as a string. These can be in a hierarchical pat
Storing Mixed Types in a Table
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Storing mixed-dtype data is supported. Strings are store as a fixed-width using the maximum size of the appended column. Subsequent appends will truncate strings at this length.
-Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set a larger minimum for the string columns. Storing ``floats, strings, ints, bools, datetime64`` are currently supported. For string columns, passing ``nan_rep = 'nan'`` to append will change the default nan representation on disk (which converts to/from `np.nan`), this defaults to `nan`.
+Storing mixed-dtype data is supported. Strings are stored as a
+fixed-width using the maximum size of the appended column. Subsequent
+appends will truncate strings at this length.
+
+Passing ``min_itemsize={`values`: size}`` as a parameter to append
+will set a larger minimum for the string columns. Storing ``floats,
+strings, ints, bools, datetime64`` are currently supported. For string
+columns, passing ``nan_rep = 'nan'`` to append will change the default
+nan representation on disk (which converts to/from `np.nan`), this
+defaults to `nan`.
.. ipython:: python
@@ -1114,9 +1136,9 @@ Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set
df_mixed['int'] = 1
df_mixed['bool'] = True
df_mixed['datetime64'] = Timestamp('20010102')
- df_mixed.ix[3:5,['A','B','string','datetime64']] = np.nan
+ df_mixed.ix[3:5,['A', 'B', 'string', 'datetime64']] = np.nan
- store.append('df_mixed', df_mixed, min_itemsize = { 'values' : 50 })
+ store.append('df_mixed', df_mixed, min_itemsize = {'values': 50})
df_mixed1 = store.select('df_mixed')
df_mixed1
df_mixed1.get_dtype_counts()
@@ -1127,7 +1149,8 @@ Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set
Storing Multi-Index DataFrames
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Storing multi-index dataframes as tables is very similar to storing/selecting from homogenous index DataFrames.
+Storing multi-index dataframes as tables is very similar to
+storing/selecting from homogeneous index DataFrames.
.. ipython:: python
@@ -1137,52 +1160,66 @@ Storing multi-index dataframes as tables is very similar to storing/selecting fr
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['foo', 'bar'])
df_mi = DataFrame(np.random.randn(10, 3), index=index,
- columns=['A', 'B', 'C'])
+ columns=['A', 'B', 'C'])
df_mi
store.append('df_mi',df_mi)
store.select('df_mi')
- # the levels are automatically included as data columns
+ # the levels are automatically included as data columns
store.select('df_mi', Term('foo=bar'))
Querying a Table
~~~~~~~~~~~~~~~~
-``select`` and ``delete`` operations have an optional criteria that can be specified to select/delete only
-a subset of the data. This allows one to have a very large on-disk table and retrieve only a portion of the data.
+
+``select`` and ``delete`` operations have an optional criterion that can
+be specified to select/delete only a subset of the data. This allows one
+to have a very large on-disk table and retrieve only a portion of the
+data.
A query is specified using the ``Term`` class under the hood.
- 'index' and 'columns' are supported indexers of a DataFrame
- - 'major_axis', 'minor_axis', and 'items' are supported indexers of the Panel
+ - 'major_axis', 'minor_axis', and 'items' are supported indexers of
+ the Panel
-Valid terms can be created from ``dict, list, tuple, or string``. Objects can be embeded as values. Allowed operations are: ``<, <=, >, >=, =``. ``=`` will be inferred as an implicit set operation (e.g. if 2 or more values are provided). The following are all valid terms.
+Valid terms can be created from ``dict, list, tuple, or
+string``. Objects can be embeded as values. Allowed operations are: ``<,
+<=, >, >=, =``. ``=`` will be inferred as an implicit set operation
+(e.g. if 2 or more values are provided). The following are all valid
+terms.
- ``dict(field = 'index', op = '>', value = '20121114')``
- ``('index', '>', '20121114')``
- ``'index > 20121114'``
- - ``('index', '>', datetime(2012,11,14))``
- - ``('index', ['20121114','20121115'])``
+ - ``('index', '>', datetime(2012, 11, 14))``
+ - ``('index', ['20121114', '20121115'])``
- ``('major_axis', '=', Timestamp('2012/11/14'))``
- - ``('minor_axis', ['A','B'])``
+ - ``('minor_axis', ['A', 'B'])``
-Queries are built up using a list of ``Terms`` (currently only **anding** of terms is supported). An example query for a panel might be specified as follows.
-``['major_axis>20000102', ('minor_axis', '=', ['A','B']) ]``. This is roughly translated to: `major_axis must be greater than the date 20000102 and the minor_axis must be A or B`
+Queries are built up using a list of ``Terms`` (currently only
+**anding** of terms is supported). An example query for a panel might be
+specified as follows. ``['major_axis>20000102', ('minor_axis', '=',
+['A', 'B']) ]``. This is roughly translated to: `major_axis must be
+greater than the date 20000102 and the minor_axis must be A or B`
.. ipython:: python
store.append('wp',wp)
store
- store.select('wp',[ Term('major_axis>20000102'), Term('minor_axis', '=', ['A','B']) ])
+ store.select('wp', [ Term('major_axis>20000102'), Term('minor_axis', '=', ['A', 'B']) ])
-The ``columns`` keyword can be supplied to select to filter a list of the return columns, this is equivalent to passing a ``Term('columns',list_of_columns_to_filter)``
+The ``columns`` keyword can be supplied to select to filter a list of
+the return columns, this is equivalent to passing a
+``Term('columns', list_of_columns_to_filter)``
.. ipython:: python
- store.select('df', columns = ['A','B'])
+ store.select('df', columns=['A', 'B'])
-Start and Stop parameters can be specified to limit the total search space. These are in terms of the total number of rows in a table.
+``start`` and ``stop`` parameters can be specified to limit the total search
+space. These are in terms of the total number of rows in a table.
.. ipython:: python
@@ -1190,12 +1227,22 @@ Start and Stop parameters can be specified to limit the total search space. Thes
wp.to_frame()
# limiting the search
- store.select('wp',[ Term('major_axis>20000102'), Term('minor_axis', '=', ['A','B']) ], start=0, stop=10)
+ store.select('wp',[ Term('major_axis>20000102'),
+ Term('minor_axis', '=', ['A','B']) ],
+ start=0, stop=10)
Indexing
~~~~~~~~
-You can create/modify an index for a table with ``create_table_index`` after data is already in the table (after and ``append/put`` operation). Creating a table index is **highly** encouraged. This will speed your queries a great deal when you use a ``select`` with the indexed dimension as the ``where``. **Indexes are automagically created (starting 0.10.1)** on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``.
+
+You can create/modify an index for a table with ``create_table_index``
+after data is already in the table (after and ``append/put``
+operation). Creating a table index is **highly** encouraged. This will
+speed your queries a great deal when you use a ``select`` with the
+indexed dimension as the ``where``. **Indexes are automagically created
+(starting 0.10.1)** on the indexables and any data columns you
+specify. This behavior can be turned off by passing ``index=False`` to
+``append``.
.. ipython:: python
@@ -1204,14 +1251,20 @@ You can create/modify an index for a table with ``create_table_index`` after dat
i.optlevel, i.kind
# change an index by passing new parameters
- store.create_table_index('df', optlevel = 9, kind = 'full')
+ store.create_table_index('df', optlevel=9, kind='full')
i = store.root.df.table.cols.index.index
i.optlevel, i.kind
Query via Data Columns
~~~~~~~~~~~~~~~~~~~~~~
-You can designate (and index) certain columns that you want to be able to perform queries (other than the `indexable` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. You can specify ``data_columns = True`` to force all columns to be data_columns
+
+You can designate (and index) certain columns that you want to be able
+to perform queries (other than the `indexable` columns, which you can
+always query). For instance say you want to perform this common
+operation, on-disk, and return just the frame that matches this
+query. You can specify ``data_columns = True`` to force all columns to
+be data_columns
.. ipython:: python
@@ -1223,81 +1276,119 @@ You can designate (and index) certain columns that you want to be able to perfor
df_dc
# on-disk operations
- store.append('df_dc', df_dc, data_columns = ['B','C','string','string2'])
- store.select('df_dc',[ Term('B>0') ])
+ store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2'])
+ store.select('df_dc', [ Term('B>0') ])
# getting creative
- store.select('df_dc',[ 'B > 0', 'C > 0', 'string == foo' ])
+ store.select('df_dc', ['B > 0', 'C > 0', 'string == foo'])
# this is in-memory version of this type of selection
df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == 'foo')]
- # we have automagically created this index and that the B/C/string/string2 columns are stored separately as ``PyTables`` columns
+ # we have automagically created this index and that the B/C/string/string2
+ # columns are stored separately as ``PyTables`` columns
store.root.df_dc.table
-There is some performance degredation by making lots of columns into `data columns`, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and create a new table!)
+There is some performance degredation by making lots of columns into
+`data columns`, so it is up to the user to designate these. In addition,
+you cannot change data columns (nor indexables) after the first
+append/put operation (Of course you can simply read in the data and
+create a new table!)
Advanced Queries
~~~~~~~~~~~~~~~~
**Unique**
-To retrieve the *unique* values of an indexable or data column, use the method ``unique``. This will, for example, enable you to get the index very quickly. Note ``nan`` are excluded from the result set.
+To retrieve the *unique* values of an indexable or data column, use the
+method ``unique``. This will, for example, enable you to get the index
+very quickly. Note ``nan`` are excluded from the result set.
.. ipython:: python
- store.unique('df_dc','index')
- store.unique('df_dc','string')
+ store.unique('df_dc', 'index')
+ store.unique('df_dc', 'string')
**Replicating or**
-``not`` and ``or`` conditions are unsupported at this time; however, ``or`` operations are easy to replicate, by repeately applying the criteria to the table, and then ``concat`` the results.
+``not`` and ``or`` conditions are unsupported at this time; however,
+``or`` operations are easy to replicate, by repeatedly applying the
+criteria to the table, and then ``concat`` the results.
.. ipython:: python
crit1 = [ Term('B>0'), Term('C>0'), Term('string=foo') ]
crit2 = [ Term('B<0'), Term('C>0'), Term('string=foo') ]
- concat([ store.select('df_dc',c) for c in [ crit1, crit2 ] ])
+ concat([store.select('df_dc',c) for c in [crit1, crit2]])
**Storer Object**
-If you want to inspect the stored object, retrieve via ``get_storer``. You could use this progamatically to say get the number of rows in an object.
+If you want to inspect the stored object, retrieve via
+``get_storer``. You could use this programmatically to say get the number
+of rows in an object.
.. ipython:: python
store.get_storer('df_dc').nrows
+
Multiple Table Queries
~~~~~~~~~~~~~~~~~~~~~~
-New in 0.10.1 are the methods ``append_to_multple`` and ``select_as_multiple``, that can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables that are indexed the same the selector table. You can then perform a very fast query on the selector table, yet get lots of data back. This method works similar to having a very wide-table, but is more efficient in terms of queries.
+New in 0.10.1 are the methods ``append_to_multple`` and
+``select_as_multiple``, that can perform appending/selecting from
+multiple tables at once. The idea is to have one table (call it the
+selector table) that you index most/all of the columns, and perform your
+queries. The other table(s) are data tables that are indexed the same as
+the selector table. You can then perform a very fast query on the
+selector table, yet get lots of data back. This method works similar to
+having a very wide table, but is more efficient in terms of queries.
-Note, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**. This means, append to the tables in the same order; ``append_to_multiple`` splits a single object to multiple tables, given a specification (as a dictionary). This dictionary is a mapping of the table names to the 'columns' you want included in that table. Pass a `None` for a single table (optional) to let it have the remaining columns. The argument ``selector`` defines which table is the selector table.
+Note, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**. This
+means, append to the tables in the same order; ``append_to_multiple``
+splits a single object to multiple tables, given a specification (as a
+dictionary). This dictionary is a mapping of the table names to the
+'columns' you want included in that table. Pass a `None` for a single
+table (optional) to let it have the remaining columns. The argument
+``selector`` defines which table is the selector table.
.. ipython:: python
- df_mt = DataFrame(randn(8, 6), index=date_range('1/1/2000', periods=8),
+ df_mt = DataFrame(randn(8, 6), index=date_range('1/1/2000', periods=8),
columns=['A', 'B', 'C', 'D', 'E', 'F'])
df_mt['foo'] = 'bar'
# you can also create the tables individually
- store.append_to_multiple({ 'df1_mt' : ['A','B'], 'df2_mt' : None }, df_mt, selector = 'df1_mt')
+ store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None },
+ df_mt, selector='df1_mt')
store
# indiviual tables were created
store.select('df1_mt')
store.select('df2_mt')
-
+
# as a multiple
- store.select_as_multiple(['df1_mt','df2_mt'], where = [ 'A>0','B>0' ], selector = 'df1_mt')
-
+ store.select_as_multiple(['df1_mt', 'df2_mt'], where=['A>0', 'B>0'],
+ selector = 'df1_mt')
+
Delete from a Table
~~~~~~~~~~~~~~~~~~~
-You can delete from a table selectively by specifying a ``where``. In deleting rows, it is important to understand the ``PyTables`` deletes rows by erasing the rows, then **moving** the following data. Thus deleting can potentially be a very expensive operation depending on the orientation of your data. This is especially true in higher dimensional objects (``Panel`` and ``Panel4D``). To get optimal deletion speed, it pays to have the dimension you are deleting be the first of the ``indexables``.
-Data is ordered (on the disk) in terms of the ``indexables``. Here's a simple use case. You store panel type data, with dates in the ``major_axis`` and ids in the ``minor_axis``. The data is then interleaved like this:
+You can delete from a table selectively by specifying a ``where``. In
+deleting rows, it is important to understand the ``PyTables`` deletes
+rows by erasing the rows, then **moving** the following data. Thus
+deleting can potentially be a very expensive operation depending on the
+orientation of your data. This is especially true in higher dimensional
+objects (``Panel`` and ``Panel4D``). To get optimal deletion speed, it
+pays to have the dimension you are deleting be the first of the
+``indexables``.
+
+Data is ordered (on the disk) in terms of the ``indexables``. Here's a
+simple use case. You store panel-type data, with dates in the
+``major_axis`` and ids in the ``minor_axis``. The data is then
+interleaved like this:
- date_1
- id_1
@@ -1309,7 +1400,11 @@ Data is ordered (on the disk) in terms of the ``indexables``. Here's a simple us
- .
- id_n
-It should be clear that a delete operation on the ``major_axis`` will be fairly quick, as one chunk is removed, then the following data moved. On the other hand a delete operation on the ``minor_axis`` will be very expensive. In this case it would almost certainly be faster to rewrite the table using a ``where`` that selects all but the missing data.
+It should be clear that a delete operation on the ``major_axis`` will be
+fairly quick, as one chunk is removed, then the following data moved. On
+the other hand a delete operation on the ``minor_axis`` will be very
+expensive. In this case it would almost certainly be faster to rewrite
+the table using a ``where`` that selects all but the missing data.
.. ipython:: python
@@ -1317,66 +1412,144 @@ It should be clear that a delete operation on the ``major_axis`` will be fairly
store.remove('wp', 'major_axis>20000102' )
store.select('wp')
-Please note that HDF5 **DOES NOT RECLAIM SPACE** in the h5 files automatically. Thus, repeatedly deleting (or removing nodes) and adding again **WILL TEND TO INCREASE THE FILE SIZE**. To *clean* the file, use ``ptrepack`` (see below).
+Please note that HDF5 **DOES NOT RECLAIM SPACE** in the h5 files
+automatically. Thus, repeatedly deleting (or removing nodes) and adding
+again **WILL TEND TO INCREASE THE FILE SIZE**. To *clean* the file, use
+``ptrepack`` (see below).
Compression
~~~~~~~~~~~
-``PyTables`` allows the stored data to be compressed. Tthis applies to all kinds of stores, not just tables.
- - Pass ``complevel=int`` for a compression level (1-9, with 0 being no compression, and the default)
- - Pass ``complib=lib`` where lib is any of ``zlib, bzip2, lzo, blosc`` for whichever compression library you prefer.
+``PyTables`` allows the stored data to be compressed. Tthis applies to
+all kinds of stores, not just tables.
+
+ - Pass ``complevel=int`` for a compression level (1-9, with 0 being no
+ compression, and the default)
+ - Pass ``complib=lib`` where lib is any of ``zlib, bzip2, lzo, blosc`` for
+ whichever compression library you prefer.
-``HDFStore`` will use the file based compression scheme if no overriding ``complib`` or ``complevel`` options are provided. ``blosc`` offers very fast compression, and is my most used. Note that ``lzo`` and ``bzip2`` may not be installed (by Python) by default.
+``HDFStore`` will use the file based compression scheme if no overriding
+``complib`` or ``complevel`` options are provided. ``blosc`` offers very
+fast compression, and is my most used. Note that ``lzo`` and ``bzip2``
+may not be installed (by Python) by default.
Compression for all objects within the file
- - ``store_compressed = HDFStore('store_compressed.h5', complevel=9, complib='blosc')``
+ - ``store_compressed = HDFStore('store_compressed.h5', complevel=9,
+ complib='blosc')``
-Or on-the-fly compression (this only applies to tables). You can turn off file compression for a specific table by passing ``complevel=0``
+Or on-the-fly compression (this only applies to tables). You can turn
+off file compression for a specific table by passing ``complevel=0``
- ``store.append('df', df, complib='zlib', complevel=5)``
**ptrepack**
-``PyTables`` offer better write performance when compressed after writing them, as opposed to turning on compression at the very beginning. You can use the supplied ``PyTables`` utility ``ptrepack``. In addition, ``ptrepack`` can change compression levels after the fact.
+``PyTables`` offer better write performance when compressed after
+writing them, as opposed to turning on compression at the very
+beginning. You can use the supplied ``PyTables`` utility
+``ptrepack``. In addition, ``ptrepack`` can change compression levels
+after the fact.
- - ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5``
+ - ``ptrepack --chunkshape=auto --propindexes --complevel=9
+ --complib=blosc in.h5 out.h5``
-Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow you to reuse previously deleted space. Aalternatively, one can simply remove the file and write again, or use the ``copy`` method.
+Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow
+you to reuse previously deleted space. Aalternatively, one can simply
+remove the file and write again, or use the ``copy`` method.
Notes & Caveats
~~~~~~~~~~~~~~~
- - Once a ``table`` is created its items (Panel) / columns (DataFrame) are fixed; only exactly the same columns can be appended
- - You can not append/select/delete to a non-table (table creation is determined on the first append, or by passing ``table=True`` in a put operation)
- - ``HDFStore`` is **not-threadsafe for writing**. The underlying ``PyTables`` only supports concurrent reads (via threading or processes). If you need reading and writing *at the same time*, you need to serialize these operations in a single thread in a single process. You will corrupt your data otherwise. See the issue for more information.
-
- - ``PyTables`` only supports fixed-width string columns in ``tables``. The sizes of a string based indexing column (e.g. *columns* or *minor_axis*) are determined as the maximum size of the elements in that axis or by passing the parameter ``min_itemsize`` on the first table creation (``min_itemsize`` can be an integer or a dict of column name to an integer). If subsequent appends introduce elements in the indexing axis that are larger than the supported indexer, an Exception will be raised (otherwise you could have a silent truncation of these indexers, leading to loss of information). Just to be clear, this fixed-width restriction applies to **indexables** (the indexing columns) and **string values** in a mixed_type table.
+ - Once a ``table`` is created its items (Panel) / columns (DataFrame)
+ are fixed; only exactly the same columns can be appended
+ - You can not append/select/delete to a non-table (table creation is
+ determined on the first append, or by passing ``table=True`` in a
+ put operation)
+ - ``HDFStore`` is **not-threadsafe for writing**. The underlying
+ ``PyTables`` only supports concurrent reads (via threading or
+ processes). If you need reading and writing *at the same time*, you
+ need to serialize these operations in a single thread in a single
+ process. You will corrupt your data otherwise. See the issue
+ for more
+ information.
+ - ``PyTables`` only supports fixed-width string columns in
+ ``tables``. The sizes of a string based indexing column
+ (e.g. *columns* or *minor_axis*) are determined as the maximum size
+ of the elements in that axis or by passing the parameter
+ ``min_itemsize`` on the first table creation (``min_itemsize`` can
+ be an integer or a dict of column name to an integer). If
+ subsequent appends introduce elements in the indexing axis that are
+ larger than the supported indexer, an Exception will be raised
+ (otherwise you could have a silent truncation of these indexers,
+ leading to loss of information). Just to be clear, this fixed-width
+ restriction applies to **indexables** (the indexing columns) and
+ **string values** in a mixed_type table.
.. ipython:: python
- store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
- wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
- store.append('wp_big_strings', wp)
- store.select('wp_big_strings')
-
- # we have provided a minimum minor_axis indexable size
- store.root.wp_big_strings.table
+ store.append('wp_big_strings', wp, min_itemsize = { 'minor_axis' : 30 })
+ wp = wp.rename_axis(lambda x: x + '_big_strings', axis=2)
+ store.append('wp_big_strings', wp)
+ store.select('wp_big_strings')
+
+ # we have provided a minimum minor_axis indexable size
+ store.root.wp_big_strings.table
+
+DataTypes
+~~~~~~~~~
+
+``HDFStore`` will map an object dtype to the ``PyTables`` underlying
+dtype. This means the following types are known to work:
+
+ - floating : ``float64, float32, float16`` *(using* ``np.nan`` *to
+ represent invalid values)*
+ - integer : ``int64, int32, int8, uint64, uint32, uint8``
+ - bool
+ - datetime64[ns] *(using* ``NaT`` *to represent invalid values)*
+ - object : ``strings`` *(using* ``np.nan`` *to represent invalid
+ values)*
+
+Currently, ``unicode`` and ``datetime`` columns (represented with a
+dtype of ``object``), **WILL FAIL**. In addition, even though a column
+may look like a ``datetime64[ns]``, if it contains ``np.nan``, this
+**WILL FAIL**. You can try to convert datetimelike columns to proper
+``datetime64[ns]`` columns, that possibily contain ``NaT`` to represent
+invalid values. (Some of these issues have been addressed and these
+conversion may not be necessary in future versions of pandas)
+
+ .. ipython:: python
+
+ import datetime
+ df = DataFrame(dict(datelike=Series([datetime.datetime(2001, 1, 1),
+ datetime.datetime(2001, 1, 2), np.nan])))
+ df
+ df.dtypes
+
+ # to convert
+ df['datelike'] = Series(df['datelike'].values, dtype='M8[ns]')
+ df
+ df.dtypes
External Compatibility
~~~~~~~~~~~~~~~~~~~~~~
-``HDFStore`` write storer objects in specific formats suitable for producing loss-less roundtrips to pandas objects. For external compatibility, ``HDFStore`` can read native ``PyTables`` format tables. It is possible to write an ``HDFStore`` object that can easily be imported into ``R`` using the ``rhdf5`` library. Create a table format store like this:
+``HDFStore`` write storer objects in specific formats suitable for
+producing loss-less roundtrips to pandas objects. For external
+compatibility, ``HDFStore`` can read native ``PyTables`` format
+tables. It is possible to write an ``HDFStore`` object that can easily
+be imported into ``R`` using the ``rhdf5`` library. Create a table
+format store like this:
.. ipython:: python
store_export = HDFStore('export.h5')
- store_export.append('df_dc',df_dc,data_columns=df_dc.columns)
- store_export
+ store_export.append('df_dc', df_dc, data_columns=df_dc.columns)
+ store_export
.. ipython:: python
:suppress:
-
+
store_export.close()
import os
os.remove('export.h5')
@@ -1384,12 +1557,19 @@ External Compatibility
Backwards Compatibility
~~~~~~~~~~~~~~~~~~~~~~~
-0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire file and write it out using the new format, using the method ``copy`` to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. ``copy`` takes a number of options, please see the docstring.
+0.10.1 of ``HDFStore`` is backwards compatible for reading tables
+created in a prior version of pandas however, query terms using the
+prior (undocumented) methodology are unsupported. ``HDFStore`` will
+issue a warning if you try to use a prior-version format file. You must
+read in the entire file and write it out using the new format, using the
+method ``copy`` to take advantage of the updates. The group attribute
+``pandas_version`` contains the version information. ``copy`` takes a
+number of options, please see the docstring.
.. ipython:: python
:suppress:
-
+
import os
legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5')
@@ -1400,27 +1580,40 @@ Backwards Compatibility
legacy_store
# copy (and return the new handle)
- new_store = legacy_store.copy('store_new.h5')
- new_store
+ new_store = legacy_store.copy('store_new.h5')
+ new_store
new_store.close()
.. ipython:: python
:suppress:
-
+
legacy_store.close()
import os
os.remove('store_new.h5')
-
+
Performance
~~~~~~~~~~~
- - ``Tables`` come with a writing performance penalty as compared to regular stores. The benefit is the ability to append/delete and query (potentially very large amounts of data).
- Write times are generally longer as compared with regular stores. Query times can be quite fast, especially on an indexed axis.
- - You can pass ``chunksize=an integer`` to ``append``, to change the writing chunksize (default is 50000). This will signficantly lower your memory usage on writing.
- - You can pass ``expectedrows=an integer`` to the first ``append``, to set the TOTAL number of expectedrows that ``PyTables`` will expected. This will optimize read/write performance.
- - Duplicate rows can be written to tables, but are filtered out in selection (with the last items being selected; thus a table is unique on major, minor pairs)
- - A ``PerformanceWarning`` will be raised if you are attempting to store types that will be pickled by PyTables (rather than stored as endemic types). See for more information and some solutions.
+ - ``Tables`` come with a writing performance penalty as compared to
+ regular stores. The benefit is the ability to append/delete and
+ query (potentially very large amounts of data). Write times are
+ generally longer as compared with regular stores. Query times can
+ be quite fast, especially on an indexed axis.
+ - You can pass ``chunksize=an integer`` to ``append``, to change the
+ writing chunksize (default is 50000). This will signficantly lower
+ your memory usage on writing.
+ - You can pass ``expectedrows=an integer`` to the first ``append``,
+ to set the TOTAL number of expectedrows that ``PyTables`` will
+ expected. This will optimize read/write performance.
+ - Duplicate rows can be written to tables, but are filtered out in
+ selection (with the last items being selected; thus a table is
+ unique on major, minor pairs)
+ - A ``PerformanceWarning`` will be raised if you are attempting to
+ store types that will be pickled by PyTables (rather than stored as
+ endemic types). See
+
+ for more information and some solutions.
Experimental
~~~~~~~~~~~~
@@ -1434,11 +1627,16 @@ HDFStore supports ``Panel4D`` storage.
store.append('p4d', p4d)
store
-These, by default, index the three axes ``items, major_axis, minor_axis``. On an ``AppendableTable`` it is possible to setup with the first append a different indexing scheme, depending on how you want to store your data. Pass the ``axes`` keyword with a list of dimension (currently must by exactly 1 less than the total dimensions of the object). This cannot be changed after table creation.
+These, by default, index the three axes ``items, major_axis,
+minor_axis``. On an ``AppendableTable`` it is possible to setup with the
+first append a different indexing scheme, depending on how you want to
+store your data. Pass the ``axes`` keyword with a list of dimension
+(currently must by exactly 1 less than the total dimensions of the
+object). This cannot be changed after table creation.
.. ipython:: python
- store.append('p4d2', p4d, axes = ['labels','major_axis','minor_axis'])
+ store.append('p4d2', p4d, axes=['labels', 'major_axis', 'minor_axis'])
store
store.select('p4d2', [ Term('labels=l1'), Term('items=Item1'), Term('minor_axis=A_big_strings') ])
diff --git a/doc/source/v0.10.2.txt b/doc/source/v0.10.2.txt
new file mode 100644
index 0000000000000..e9fed5b36f3cd
--- /dev/null
+++ b/doc/source/v0.10.2.txt
@@ -0,0 +1,18 @@
+.. _whatsnew_0102:
+
+v0.10.2 (February ??, 2013)
+---------------------------
+
+This is a minor release from 0.10.1 and includes many new features and
+enhancements along with a large number of bug fixes. There are also a number of
+important API changes that long-time pandas users should pay close attention
+to.
+
+**Enhancements**
+
+ - In ``HDFStore``, provide dotted attribute access to ``get`` from stores (e.g. store.df == store['df'])
+
+See the `full release notes
+`__ or issue tracker
+on GitHub for a complete list.
+
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 6c125c45a2599..646610ecccd88 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -16,6 +16,8 @@ What's New
These are new features and improvements of note in each release.
+.. include:: v0.10.2.txt
+
.. include:: v0.10.1.txt
.. include:: v0.10.0.txt
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 78bd204f26993..1a00ff522ccda 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -197,19 +197,19 @@ def __init__(self, path, mode='a', complevel=None, complib=None,
except ImportError: # pragma: no cover
raise Exception('HDFStore requires PyTables')
- self.path = path
- self.mode = mode
- self.handle = None
- self.complevel = complevel
- self.complib = complib
- self.fletcher32 = fletcher32
- self.filters = None
+ self._path = path
+ self._mode = mode
+ self._handle = None
+ self._complevel = complevel
+ self._complib = complib
+ self._fletcher32 = fletcher32
+ self._filters = None
self.open(mode=mode, warn=False)
@property
def root(self):
""" return the root node """
- return self.handle.root
+ return self._handle.root
def __getitem__(self, key):
return self.get(key)
@@ -220,10 +220,19 @@ def __setitem__(self, key, value):
def __delitem__(self, key):
return self.remove(key)
+ def __getattr__(self, name):
+ """ allow attribute access to get stores """
+ try:
+ return self.get(name)
+ except:
+ pass
+ raise AttributeError("'%s' object has no attribute '%s'" %
+ (type(self).__name__, name))
+
def __contains__(self, key):
""" check for existance of this key
can match the exact pathname or the pathnm w/o the leading '/'
- """
+ """
node = self.get_node(key)
if node is not None:
name = node._v_pathname
@@ -234,7 +243,7 @@ def __len__(self):
return len(self.groups())
def __repr__(self):
- output = '%s\nFile path: %s\n' % (type(self), self.path)
+ output = '%s\nFile path: %s\n' % (type(self), self._path)
if len(self.keys()):
keys = []
@@ -277,7 +286,7 @@ def open(self, mode='a', warn=True):
mode : {'a', 'w', 'r', 'r+'}, default 'a'
See HDFStore docstring or tables.openFile for info about modes
"""
- self.mode = mode
+ self._mode = mode
if warn and mode == 'w': # pragma: no cover
while True:
response = raw_input("Re-opening as mode='w' will delete the "
@@ -286,22 +295,22 @@ def open(self, mode='a', warn=True):
break
elif response == 'n':
return
- if self.handle is not None and self.handle.isopen:
- self.handle.close()
+ if self._handle is not None and self._handle.isopen:
+ self._handle.close()
- if self.complib is not None:
- if self.complevel is None:
- self.complevel = 9
- self.filters = _tables().Filters(self.complevel,
- self.complib,
- fletcher32=self.fletcher32)
+ if self._complib is not None:
+ if self._complevel is None:
+ self._complevel = 9
+ self._filters = _tables().Filters(self._complevel,
+ self._complib,
+ fletcher32=self._fletcher32)
try:
- self.handle = h5_open(self.path, self.mode)
+ self._handle = h5_open(self._path, self._mode)
except IOError, e: # pragma: no cover
if 'can not be written' in str(e):
- print 'Opening %s in read-only mode' % self.path
- self.handle = h5_open(self.path, 'r')
+ print 'Opening %s in read-only mode' % self._path
+ self._handle = h5_open(self._path, 'r')
else:
raise
@@ -309,13 +318,13 @@ def close(self):
"""
Close the PyTables file handle
"""
- self.handle.close()
+ self._handle.close()
def flush(self):
"""
Force all buffered modifications to be written to disk
"""
- self.handle.flush()
+ self._handle.flush()
def get(self, key):
"""
@@ -617,14 +626,14 @@ def create_table_index(self, key, **kwargs):
def groups(self):
""" return a list of all the top-level nodes (that are not themselves a pandas storage object) """
_tables()
- return [ g for g in self.handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ]
+ return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ]
def get_node(self, key):
""" return the node with the key or None if it does not exist """
try:
if not key.startswith('/'):
key = '/' + key
- return self.handle.getNode(self.root, key)
+ return self._handle.getNode(self.root, key)
except:
return None
@@ -751,7 +760,7 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
# remove the node if we are not appending
if group is not None and not append:
- self.handle.removeNode(group, recursive=True)
+ self._handle.removeNode(group, recursive=True)
group = None
if group is None:
@@ -768,7 +777,7 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
new_path += p
group = self.get_node(new_path)
if group is None:
- group = self.handle.createGroup(path, p)
+ group = self._handle.createGroup(path, p)
path = new_path
s = self._create_storer(group, value, table=table, append=append, **kwargs)
@@ -1304,28 +1313,28 @@ def pathname(self):
return self.group._v_pathname
@property
- def handle(self):
- return self.parent.handle
+ def _handle(self):
+ return self.parent._handle
@property
def _quiet(self):
return self.parent._quiet
@property
- def filters(self):
- return self.parent.filters
+ def _filters(self):
+ return self.parent._filters
@property
- def complevel(self):
- return self.parent.complevel
+ def _complevel(self):
+ return self.parent._complevel
@property
- def fletcher32(self):
- return self.parent.fletcher32
+ def _fletcher32(self):
+ return self.parent._fletcher32
@property
- def complib(self):
- return self.parent.complib
+ def _complib(self):
+ return self.parent._complib
@property
def attrs(self):
@@ -1380,7 +1389,7 @@ def write(self, **kwargs):
def delete(self, where = None, **kwargs):
""" support fully deleting the node in its entirety (only) - where specification must be None """
if where is None:
- self.handle.removeNode(self.group, recursive=True)
+ self._handle.removeNode(self.group, recursive=True)
return None
raise NotImplementedError("cannot delete on an abstract storer")
@@ -1583,7 +1592,7 @@ def read_index_node(self, node):
def write_array(self, key, value):
if key in self.group:
- self.handle.removeNode(self.group, key)
+ self._handle.removeNode(self.group, key)
# Transform needed to interface with pytables row/col notation
empty_array = any(x == 0 for x in value.shape)
@@ -1593,7 +1602,7 @@ def write_array(self, key, value):
value = value.T
transposed = True
- if self.filters is not None:
+ if self._filters is not None:
atom = None
try:
# get the atom for this datatype
@@ -1603,9 +1612,9 @@ def write_array(self, key, value):
if atom is not None:
# create an empty chunked array and fill it from value
- ca = self.handle.createCArray(self.group, key, atom,
+ ca = self._handle.createCArray(self.group, key, atom,
value.shape,
- filters=self.filters)
+ filters=self._filters)
ca[:] = value
getattr(self.group, key)._v_attrs.transposed = transposed
return
@@ -1622,21 +1631,21 @@ def write_array(self, key, value):
ws = performance_doc % (inferred_type,key)
warnings.warn(ws, PerformanceWarning)
- vlarr = self.handle.createVLArray(self.group, key,
+ vlarr = self._handle.createVLArray(self.group, key,
_tables().ObjectAtom())
vlarr.append(value)
elif value.dtype.type == np.datetime64:
- self.handle.createArray(self.group, key, value.view('i8'))
+ self._handle.createArray(self.group, key, value.view('i8'))
getattr(self.group, key)._v_attrs.value_type = 'datetime64'
else:
if empty_array:
# ugly hack for length 0 axes
arr = np.empty((1,) * value.ndim)
- self.handle.createArray(self.group, key, arr)
+ self._handle.createArray(self.group, key, arr)
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
getattr(self.group, key)._v_attrs.shape = value.shape
else:
- self.handle.createArray(self.group, key, value)
+ self._handle.createArray(self.group, key, value)
getattr(self.group, key)._v_attrs.transposed = transposed
@@ -1729,7 +1738,7 @@ def write(self, obj, **kwargs):
for name, ss in obj.iteritems():
key = 'sparse_series_%s' % name
if key not in self.group._v_children:
- node = self.handle.createGroup(self.group, key)
+ node = self._handle.createGroup(self.group, key)
else:
node = getattr(self.group, key)
s = SparseSeriesStorer(self.parent, node)
@@ -1763,7 +1772,7 @@ def write(self, obj, **kwargs):
for name, sdf in obj.iteritems():
key = 'sparse_frame_%s' % name
if key not in self.group._v_children:
- node = self.handle.createGroup(self.group, key)
+ node = self._handle.createGroup(self.group, key)
else:
node = getattr(self.group, key)
s = SparseFrameStorer(self.parent, node)
@@ -2293,13 +2302,13 @@ def create_description(self, complib=None, complevel=None, fletcher32=False, exp
if complib:
if complevel is None:
- complevel = self.complevel or 9
+ complevel = self._complevel or 9
filters = _tables().Filters(complevel=complevel,
complib=complib,
- fletcher32=fletcher32 or self.fletcher32)
+ fletcher32=fletcher32 or self._fletcher32)
d['filters'] = filters
- elif self.filters is not None:
- d['filters'] = self.filters
+ elif self._filters is not None:
+ d['filters'] = self._filters
return d
@@ -2484,7 +2493,7 @@ def write(self, obj, axes=None, append=False, complib=None,
expectedrows=None, **kwargs):
if not append and self.is_exists:
- self.handle.removeNode(self.group, 'table')
+ self._handle.removeNode(self.group, 'table')
# create the axes
self.create_axes(axes=axes, obj=obj, validate=append,
@@ -2502,7 +2511,7 @@ def write(self, obj, axes=None, append=False, complib=None,
self.set_attrs()
# create the table
- table = self.handle.createTable(self.group, **options)
+ table = self._handle.createTable(self.group, **options)
else:
table = self.table
@@ -2552,6 +2561,11 @@ def write_data(self, chunksize):
def write_data_chunk(self, indexes, mask, search, values):
+ # 0 len
+ for v in values:
+ if not np.prod(v.shape):
+ return
+
# get our function
try:
func = getattr(lib, "create_hdf_rows_%sd" % self.ndim)
@@ -2574,7 +2588,7 @@ def delete(self, where=None, **kwargs):
# delete all rows (and return the nrows)
if where is None or not len(where):
nrows = self.nrows
- self.handle.removeNode(self.group, recursive=True)
+ self._handle.removeNode(self.group, recursive=True)
return nrows
# infer the data kind
@@ -2894,6 +2908,7 @@ class Term(object):
_ops = ['<=', '<', '>=', '>', '!=', '==', '=']
_search = re.compile("^\s*(?P\w+)\s*(?P%s)\s*(?P.+)\s*$" % '|'.join(_ops))
+ _max_selectors = 31
def __init__(self, field, op=None, value=None, queryables=None):
self.field = None
@@ -3006,7 +3021,7 @@ def eval(self):
if self.is_in_table:
# too many values to create the expression?
- if len(values) <= 61:
+ if len(values) <= self._max_selectors:
self.condition = "(%s)" % ' | '.join(
["(%s == %s)" % (self.field, v[0]) for v in values])
@@ -3138,3 +3153,15 @@ def select_coords(self):
return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True)
+### utilities ###
+
+def timeit(key,df,fn=None,remove=True,**kwargs):
+ if fn is None:
+ fn = 'timeit.h5'
+ store = HDFStore(fn,mode='w')
+ store.append(key,df,**kwargs)
+ store.close()
+
+ if remove:
+ import os
+ os.remove(fn)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 934e088ddc1d3..a4df428d60d90 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -27,236 +27,293 @@
_multiprocess_can_split_ = False
+# contextmanager to ensure the file cleanup
+def safe_remove(path):
+ if path is not None:
+ import os
+ try:
+ os.remove(path)
+ except:
+ pass
+
+def safe_close(store):
+ try:
+ if store is not None:
+ store.close()
+ except:
+ pass
+
+from contextlib import contextmanager
+
+@contextmanager
+def ensure_clean(path, mode='a', complevel=None, complib=None,
+ fletcher32=False):
+ store = HDFStore(path, mode=mode, complevel=complevel,
+ complib=complib, fletcher32=False)
+ try:
+ yield store
+ finally:
+ safe_close(store)
+ if mode == 'w' or mode == 'a':
+ safe_remove(path)
+
+# set these parameters so we don't have file sharing
+tables.parameters.MAX_NUMEXPR_THREADS = 1
+tables.parameters.MAX_BLOSC_THREADS = 1
+tables.parameters.MAX_THREADS = 1
+
class TestHDFStore(unittest.TestCase):
- scratchpath = '__scratch__.h5'
def setUp(self):
warnings.filterwarnings(action='ignore', category=FutureWarning)
self.path = '__%s__.h5' % tm.rands(10)
- self.store = HDFStore(self.path)
def tearDown(self):
- self.store.close()
- try:
- os.remove(self.path)
- except os.error:
- pass
+ pass
def test_factory_fun(self):
try:
- with get_store(self.scratchpath) as tbl:
+ with get_store(self.path) as tbl:
raise ValueError('blah')
except ValueError:
pass
+ finally:
+ safe_remove(self.path)
- with get_store(self.scratchpath) as tbl:
- tbl['a'] = tm.makeDataFrame()
-
- with get_store(self.scratchpath) as tbl:
- self.assertEquals(len(tbl), 1)
- self.assertEquals(type(tbl['a']), DataFrame)
-
- os.remove(self.scratchpath)
+ try:
+ with get_store(self.path) as tbl:
+ tbl['a'] = tm.makeDataFrame()
+
+ with get_store(self.path) as tbl:
+ self.assertEquals(len(tbl), 1)
+ self.assertEquals(type(tbl['a']), DataFrame)
+ finally:
+ safe_remove(self.path)
def test_keys(self):
- self.store['a'] = tm.makeTimeSeries()
- self.store['b'] = tm.makeStringSeries()
- self.store['c'] = tm.makeDataFrame()
- self.store['d'] = tm.makePanel()
- self.store['foo/bar'] = tm.makePanel()
- self.assertEquals(len(self.store), 5)
- self.assert_(set(
- self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar']))
+
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ store['b'] = tm.makeStringSeries()
+ store['c'] = tm.makeDataFrame()
+ store['d'] = tm.makePanel()
+ store['foo/bar'] = tm.makePanel()
+ self.assertEquals(len(store), 5)
+ self.assert_(set(
+ store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar']))
def test_repr(self):
- repr(self.store)
- self.store['a'] = tm.makeTimeSeries()
- self.store['b'] = tm.makeStringSeries()
- self.store['c'] = tm.makeDataFrame()
- self.store['d'] = tm.makePanel()
- self.store['foo/bar'] = tm.makePanel()
- self.store.append('e', tm.makePanel())
- df = tm.makeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['bool1'] = df['A'] > 0
- df['bool2'] = df['B'] > 0
- df['bool3'] = True
- df['int1'] = 1
- df['int2'] = 2
- df['timestamp1'] = Timestamp('20010102')
- df['timestamp2'] = Timestamp('20010103')
- df['datetime1'] = datetime.datetime(2001,1,2,0,0)
- df['datetime2'] = datetime.datetime(2001,1,3,0,0)
- df.ix[3:6,['obj1']] = np.nan
- df = df.consolidate().convert_objects()
- self.store['df'] = df
+ with ensure_clean(self.path) as store:
+ repr(store)
+ store['a'] = tm.makeTimeSeries()
+ store['b'] = tm.makeStringSeries()
+ store['c'] = tm.makeDataFrame()
+ store['d'] = tm.makePanel()
+ store['foo/bar'] = tm.makePanel()
+ store.append('e', tm.makePanel())
- # make a random group in hdf space
- self.store.handle.createGroup(self.store.handle.root,'bah')
+ df = tm.makeDataFrame()
+ df['obj1'] = 'foo'
+ df['obj2'] = 'bar'
+ df['bool1'] = df['A'] > 0
+ df['bool2'] = df['B'] > 0
+ df['bool3'] = True
+ df['int1'] = 1
+ df['int2'] = 2
+ df['timestamp1'] = Timestamp('20010102')
+ df['timestamp2'] = Timestamp('20010103')
+ df['datetime1'] = datetime.datetime(2001,1,2,0,0)
+ df['datetime2'] = datetime.datetime(2001,1,3,0,0)
+ df.ix[3:6,['obj1']] = np.nan
+ df = df.consolidate().convert_objects()
+ store['df'] = df
+
+ # make a random group in hdf space
+ store._handle.createGroup(store._handle.root,'bah')
- repr(self.store)
- str(self.store)
+ repr(store)
+ str(store)
def test_contains(self):
- self.store['a'] = tm.makeTimeSeries()
- self.store['b'] = tm.makeDataFrame()
- self.store['foo/bar'] = tm.makeDataFrame()
- self.assert_('a' in self.store)
- self.assert_('b' in self.store)
- self.assert_('c' not in self.store)
- self.assert_('foo/bar' in self.store)
- self.assert_('/foo/bar' in self.store)
- self.assert_('/foo/b' not in self.store)
- self.assert_('bar' not in self.store)
-
- # GH 2694
- warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
- self.store['node())'] = tm.makeDataFrame()
- self.assert_('node())' in self.store)
- warnings.filterwarnings('always', category=tables.NaturalNameWarning)
+
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ store['b'] = tm.makeDataFrame()
+ store['foo/bar'] = tm.makeDataFrame()
+ self.assert_('a' in store)
+ self.assert_('b' in store)
+ self.assert_('c' not in store)
+ self.assert_('foo/bar' in store)
+ self.assert_('/foo/bar' in store)
+ self.assert_('/foo/b' not in store)
+ self.assert_('bar' not in store)
+
+ # GH 2694
+ warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
+ store['node())'] = tm.makeDataFrame()
+ self.assert_('node())' in store)
+ warnings.filterwarnings('always', category=tables.NaturalNameWarning)
def test_versioning(self):
- self.store['a'] = tm.makeTimeSeries()
- self.store['b'] = tm.makeDataFrame()
- df = tm.makeTimeDataFrame()
- self.store.remove('df1')
- self.store.append('df1', df[:10])
- self.store.append('df1', df[10:])
- self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10.1')
- self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10.1')
- self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10.1')
-
- # write a file and wipe its versioning
- self.store.remove('df2')
- self.store.append('df2', df)
-
- # this is an error because its table_type is appendable, but no version
- # info
- self.store.get_node('df2')._v_attrs.pandas_version = None
- self.assertRaises(Exception, self.store.select, 'df2')
-
- def test_meta(self):
- raise nose.SkipTest('no meta')
-
- meta = {'foo': ['I love pandas ']}
- s = tm.makeTimeSeries()
- s.meta = meta
- self.store['a'] = s
- self.assert_(self.store['a'].meta == meta)
- df = tm.makeDataFrame()
- df.meta = meta
- self.store['b'] = df
- self.assert_(self.store['b'].meta == meta)
-
- # this should work, but because slicing doesn't propgate meta it doesn
- self.store.remove('df1')
- self.store.append('df1', df[:10])
- self.store.append('df1', df[10:])
- results = self.store['df1']
- # self.assert_(getattr(results,'meta',None) == meta)
-
- # no meta
- df = tm.makeDataFrame()
- self.store['b'] = df
- self.assert_(hasattr(self.store['b'], 'meta') is False)
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ store['b'] = tm.makeDataFrame()
+ df = tm.makeTimeDataFrame()
+ store.remove('df1')
+ store.append('df1', df[:10])
+ store.append('df1', df[10:])
+ self.assert_(store.root.a._v_attrs.pandas_version == '0.10.1')
+ self.assert_(store.root.b._v_attrs.pandas_version == '0.10.1')
+ self.assert_(store.root.df1._v_attrs.pandas_version == '0.10.1')
+
+ # write a file and wipe its versioning
+ store.remove('df2')
+ store.append('df2', df)
+
+ # this is an error because its table_type is appendable, but no version
+ # info
+ store.get_node('df2')._v_attrs.pandas_version = None
+ self.assertRaises(Exception, store.select, 'df2')
def test_reopen_handle(self):
- self.store['a'] = tm.makeTimeSeries()
- self.store.open('w', warn=False)
- self.assert_(self.store.handle.isopen)
- self.assertEquals(len(self.store), 0)
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ store.open('w', warn=False)
+ self.assert_(store._handle.isopen)
+ self.assertEquals(len(store), 0)
+
def test_flush(self):
- self.store['a'] = tm.makeTimeSeries()
- self.store.flush()
+
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ store.flush()
def test_get(self):
- self.store['a'] = tm.makeTimeSeries()
- left = self.store.get('a')
- right = self.store['a']
- tm.assert_series_equal(left, right)
- left = self.store.get('/a')
- right = self.store['/a']
- tm.assert_series_equal(left, right)
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeSeries()
+ left = store.get('a')
+ right = store['a']
+ tm.assert_series_equal(left, right)
+
+ left = store.get('/a')
+ right = store['/a']
+ tm.assert_series_equal(left, right)
+
+ self.assertRaises(KeyError, store.get, 'b')
- self.assertRaises(KeyError, self.store.get, 'b')
+ def test_getattr(self):
- def test_put(self):
- ts = tm.makeTimeSeries()
- df = tm.makeTimeDataFrame()
- self.store['a'] = ts
- self.store['b'] = df[:10]
- self.store['foo/bar/bah'] = df[:10]
- self.store['foo'] = df[:10]
- self.store['/foo'] = df[:10]
- self.store.put('c', df[:10], table=True)
+ with ensure_clean(self.path) as store:
- # not OK, not a table
- self.assertRaises(
- ValueError, self.store.put, 'b', df[10:], append=True)
+ s = tm.makeTimeSeries()
+ store['a'] = s
- # node does not currently exist, test _is_table_type returns False in
- # this case
- #self.store.remove('f')
- #self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True)
+ # test attribute access
+ result = store.a
+ tm.assert_series_equal(result, s)
+ result = getattr(store,'a')
+ tm.assert_series_equal(result, s)
- # can't put to a table (use append instead)
- self.assertRaises(ValueError, self.store.put, 'c', df[10:], append=True)
+ df = tm.makeTimeDataFrame()
+ store['df'] = df
+ result = store.df
+ tm.assert_frame_equal(result, df)
- # overwrite table
- self.store.put('c', df[:10], table=True, append=False)
- tm.assert_frame_equal(df[:10], self.store['c'])
+ # errors
+ self.assertRaises(AttributeError, getattr, store, 'd')
- def test_put_string_index(self):
+ for x in ['mode','path','handle','complib']:
+ self.assertRaises(AttributeError, getattr, store, x)
- index = Index(
- ["I am a very long string index: %s" % i for i in range(20)])
- s = Series(np.arange(20), index=index)
- df = DataFrame({'A': s, 'B': s})
+ # not stores
+ for x in ['mode','path','handle','complib']:
+ getattr(store,"_%s" % x)
- self.store['a'] = s
- tm.assert_series_equal(self.store['a'], s)
+ def test_put(self):
- self.store['b'] = df
- tm.assert_frame_equal(self.store['b'], df)
+ with ensure_clean(self.path) as store:
+
+ ts = tm.makeTimeSeries()
+ df = tm.makeTimeDataFrame()
+ store['a'] = ts
+ store['b'] = df[:10]
+ store['foo/bar/bah'] = df[:10]
+ store['foo'] = df[:10]
+ store['/foo'] = df[:10]
+ store.put('c', df[:10], table=True)
+
+ # not OK, not a table
+ self.assertRaises(
+ ValueError, store.put, 'b', df[10:], append=True)
+
+ # node does not currently exist, test _is_table_type returns False in
+ # this case
+ # store.remove('f')
+ # self.assertRaises(ValueError, store.put, 'f', df[10:], append=True)
+
+ # can't put to a table (use append instead)
+ self.assertRaises(ValueError, store.put, 'c', df[10:], append=True)
+
+ # overwrite table
+ store.put('c', df[:10], table=True, append=False)
+ tm.assert_frame_equal(df[:10], store['c'])
- # mixed length
- index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)])
- s = Series(np.arange(21), index=index)
- df = DataFrame({'A': s, 'B': s})
- self.store['a'] = s
- tm.assert_series_equal(self.store['a'], s)
+ def test_put_string_index(self):
- self.store['b'] = df
- tm.assert_frame_equal(self.store['b'], df)
+ with ensure_clean(self.path) as store:
+
+ index = Index(
+ ["I am a very long string index: %s" % i for i in range(20)])
+ s = Series(np.arange(20), index=index)
+ df = DataFrame({'A': s, 'B': s})
+
+ store['a'] = s
+ tm.assert_series_equal(store['a'], s)
+
+ store['b'] = df
+ tm.assert_frame_equal(store['b'], df)
+
+ # mixed length
+ index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] + ["I am a very long string index: %s" % i for i in range(20)])
+ s = Series(np.arange(21), index=index)
+ df = DataFrame({'A': s, 'B': s})
+ store['a'] = s
+ tm.assert_series_equal(store['a'], s)
+
+ store['b'] = df
+ tm.assert_frame_equal(store['b'], df)
def test_put_compression(self):
- df = tm.makeTimeDataFrame()
- self.store.put('c', df, table=True, complib='zlib')
- tm.assert_frame_equal(self.store['c'], df)
+ with ensure_clean(self.path) as store:
+ df = tm.makeTimeDataFrame()
- # can't compress if table=False
- self.assertRaises(ValueError, self.store.put, 'b', df,
- table=False, complib='zlib')
+ store.put('c', df, table=True, complib='zlib')
+ tm.assert_frame_equal(store['c'], df)
+
+ # can't compress if table=False
+ self.assertRaises(ValueError, store.put, 'b', df,
+ table=False, complib='zlib')
def test_put_compression_blosc(self):
tm.skip_if_no_package('tables', '2.2', app='blosc support')
df = tm.makeTimeDataFrame()
- # can't compress if table=False
- self.assertRaises(ValueError, self.store.put, 'b', df,
- table=False, complib='blosc')
-
- self.store.put('c', df, table=True, complib='blosc')
- tm.assert_frame_equal(self.store['c'], df)
+ with ensure_clean(self.path) as store:
+ # can't compress if table=False
+ self.assertRaises(ValueError, store.put, 'b', df,
+ table=False, complib='blosc')
+
+ store.put('c', df, table=True, complib='blosc')
+ tm.assert_frame_equal(store['c'], df)
+
def test_put_integer(self):
# non-date, non-string index
df = DataFrame(np.random.randn(50, 100))
@@ -277,432 +334,447 @@ def test_put_mixed_type(self):
df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
df.ix[3:6, ['obj1']] = np.nan
df = df.consolidate().convert_objects()
- self.store.remove('df')
- warnings.filterwarnings('ignore', category=PerformanceWarning)
- self.store.put('df',df)
- expected = self.store.get('df')
- tm.assert_frame_equal(expected,df)
- warnings.filterwarnings('always', category=PerformanceWarning)
+ with ensure_clean(self.path) as store:
+ store.remove('df')
+ warnings.filterwarnings('ignore', category=PerformanceWarning)
+ store.put('df',df)
+ expected = store.get('df')
+ tm.assert_frame_equal(expected,df)
+ warnings.filterwarnings('always', category=PerformanceWarning)
+
def test_append(self):
- df = tm.makeTimeDataFrame()
- self.store.remove('df1')
- self.store.append('df1', df[:10])
- self.store.append('df1', df[10:])
- tm.assert_frame_equal(self.store['df1'], df)
-
- self.store.remove('df2')
- self.store.put('df2', df[:10], table=True)
- self.store.append('df2', df[10:])
- tm.assert_frame_equal(self.store['df2'], df)
-
- self.store.remove('df3')
- self.store.append('/df3', df[:10])
- self.store.append('/df3', df[10:])
- tm.assert_frame_equal(self.store['df3'], df)
-
- # this is allowed by almost always don't want to do it
- warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
- self.store.remove('/df3 foo')
- self.store.append('/df3 foo', df[:10])
- self.store.append('/df3 foo', df[10:])
- tm.assert_frame_equal(self.store['df3 foo'], df)
- warnings.filterwarnings('always', category=tables.NaturalNameWarning)
-
- # panel
- wp = tm.makePanel()
- self.store.remove('wp1')
- self.store.append('wp1', wp.ix[:, :10, :])
- self.store.append('wp1', wp.ix[:, 10:, :])
- tm.assert_panel_equal(self.store['wp1'], wp)
-
- # ndim
- p4d = tm.makePanel4D()
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :])
- self.store.append('p4d', p4d.ix[:, :, 10:, :])
- tm.assert_panel4d_equal(self.store['p4d'], p4d)
-
- # test using axis labels
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :], axes=[
- 'items', 'major_axis', 'minor_axis'])
- self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
- 'items', 'major_axis', 'minor_axis'])
- tm.assert_panel4d_equal(self.store['p4d'], p4d)
-
- # test using differnt number of items on each axis
- p4d2 = p4d.copy()
- p4d2['l4'] = p4d['l1']
- p4d2['l5'] = p4d['l1']
- self.store.remove('p4d2')
- self.store.append(
- 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis'])
- tm.assert_panel4d_equal(self.store['p4d2'], p4d2)
-
- # test using differt order of items on the non-index axes
- self.store.remove('wp1')
- wp_append1 = wp.ix[:, :10, :]
- self.store.append('wp1', wp_append1)
- wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1])
- self.store.append('wp1', wp_append2)
- tm.assert_panel_equal(self.store['wp1'], wp)
-
- # dtype issues - mizxed type in a single object column
- df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
- df['mixed_column'] = 'testing'
- df.ix[2, 'mixed_column'] = np.nan
- self.store.remove('df')
- self.store.append('df', df)
- tm.assert_frame_equal(self.store['df'], df)
+ with ensure_clean(self.path) as store:
+ df = tm.makeTimeDataFrame()
+ store.remove('df1')
+ store.append('df1', df[:10])
+ store.append('df1', df[10:])
+ tm.assert_frame_equal(store['df1'], df)
+
+ store.remove('df2')
+ store.put('df2', df[:10], table=True)
+ store.append('df2', df[10:])
+ tm.assert_frame_equal(store['df2'], df)
+
+ store.remove('df3')
+ store.append('/df3', df[:10])
+ store.append('/df3', df[10:])
+ tm.assert_frame_equal(store['df3'], df)
+
+ # this is allowed by almost always don't want to do it
+ warnings.filterwarnings('ignore', category=tables.NaturalNameWarning)
+ store.remove('/df3 foo')
+ store.append('/df3 foo', df[:10])
+ store.append('/df3 foo', df[10:])
+ tm.assert_frame_equal(store['df3 foo'], df)
+ warnings.filterwarnings('always', category=tables.NaturalNameWarning)
+
+ # panel
+ wp = tm.makePanel()
+ store.remove('wp1')
+ store.append('wp1', wp.ix[:, :10, :])
+ store.append('wp1', wp.ix[:, 10:, :])
+ tm.assert_panel_equal(store['wp1'], wp)
+
+ # ndim
+ p4d = tm.makePanel4D()
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :])
+ store.append('p4d', p4d.ix[:, :, 10:, :])
+ tm.assert_panel4d_equal(store['p4d'], p4d)
+
+ # test using axis labels
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :], axes=[
+ 'items', 'major_axis', 'minor_axis'])
+ store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
+ 'items', 'major_axis', 'minor_axis'])
+ tm.assert_panel4d_equal(store['p4d'], p4d)
+
+ # test using differnt number of items on each axis
+ p4d2 = p4d.copy()
+ p4d2['l4'] = p4d['l1']
+ p4d2['l5'] = p4d['l1']
+ store.remove('p4d2')
+ store.append(
+ 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis'])
+ tm.assert_panel4d_equal(store['p4d2'], p4d2)
+
+ # test using differt order of items on the non-index axes
+ store.remove('wp1')
+ wp_append1 = wp.ix[:, :10, :]
+ store.append('wp1', wp_append1)
+ wp_append2 = wp.ix[:, 10:, :].reindex(items=wp.items[::-1])
+ store.append('wp1', wp_append2)
+ tm.assert_panel_equal(store['wp1'], wp)
+
+ # dtype issues - mizxed type in a single object column
+ df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
+ df['mixed_column'] = 'testing'
+ df.ix[2, 'mixed_column'] = np.nan
+ store.remove('df')
+ store.append('df', df)
+ tm.assert_frame_equal(store['df'], df)
def test_append_frame_column_oriented(self):
- # column oriented
- df = tm.makeTimeDataFrame()
- self.store.remove('df1')
- self.store.append('df1', df.ix[:, :2], axes=['columns'])
- self.store.append('df1', df.ix[:, 2:])
- tm.assert_frame_equal(self.store['df1'], df)
-
- result = self.store.select('df1', 'columns=A')
- expected = df.reindex(columns=['A'])
- tm.assert_frame_equal(expected, result)
-
- # this isn't supported
- self.assertRaises(Exception, self.store.select, 'df1', (
- 'columns=A', Term('index', '>', df.index[4])))
-
- # selection on the non-indexable
- result = self.store.select(
- 'df1', ('columns=A', Term('index', '=', df.index[0:4])))
- expected = df.reindex(columns=['A'], index=df.index[0:4])
- tm.assert_frame_equal(expected, result)
+ with ensure_clean(self.path) as store:
+ # column oriented
+ df = tm.makeTimeDataFrame()
+ store.remove('df1')
+ store.append('df1', df.ix[:, :2], axes=['columns'])
+ store.append('df1', df.ix[:, 2:])
+ tm.assert_frame_equal(store['df1'], df)
+
+ result = store.select('df1', 'columns=A')
+ expected = df.reindex(columns=['A'])
+ tm.assert_frame_equal(expected, result)
+
+ # this isn't supported
+ self.assertRaises(Exception, store.select, 'df1', (
+ 'columns=A', Term('index', '>', df.index[4])))
+
+ # selection on the non-indexable
+ result = store.select(
+ 'df1', ('columns=A', Term('index', '=', df.index[0:4])))
+ expected = df.reindex(columns=['A'], index=df.index[0:4])
+ tm.assert_frame_equal(expected, result)
def test_ndim_indexables(self):
""" test using ndim tables in new ways"""
- p4d = tm.makePanel4D()
-
- def check_indexers(key, indexers):
- for i, idx in enumerate(indexers):
- self.assert_(getattr(getattr(
- self.store.root, key).table.description, idx)._v_pos == i)
-
- # append then change (will take existing schema)
- indexers = ['items', 'major_axis', 'minor_axis']
-
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
- self.store.append('p4d', p4d.ix[:, :, 10:, :])
- tm.assert_panel4d_equal(self.store.select('p4d'), p4d)
- check_indexers('p4d', indexers)
-
- # same as above, but try to append with differnt axes
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
- self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
- 'labels', 'items', 'major_axis'])
- tm.assert_panel4d_equal(self.store.select('p4d'), p4d)
- check_indexers('p4d', indexers)
-
- # pass incorrect number of axes
- self.store.remove('p4d')
- self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[
- :, :, :10, :], axes=['major_axis', 'minor_axis'])
-
- # different than default indexables #1
- indexers = ['labels', 'major_axis', 'minor_axis']
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
- self.store.append('p4d', p4d.ix[:, :, 10:, :])
- tm.assert_panel4d_equal(self.store['p4d'], p4d)
- check_indexers('p4d', indexers)
-
- # different than default indexables #2
- indexers = ['major_axis', 'labels', 'minor_axis']
- self.store.remove('p4d')
- self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
- self.store.append('p4d', p4d.ix[:, :, 10:, :])
- tm.assert_panel4d_equal(self.store['p4d'], p4d)
- check_indexers('p4d', indexers)
-
- # partial selection
- result = self.store.select('p4d', ['labels=l1'])
- expected = p4d.reindex(labels=['l1'])
- tm.assert_panel4d_equal(result, expected)
-
- # partial selection2
- result = self.store.select('p4d', [Term(
- 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')])
- expected = p4d.reindex(
- labels=['l1'], items=['ItemA'], minor_axis=['B'])
- tm.assert_panel4d_equal(result, expected)
-
- # non-existant partial selection
- result = self.store.select('p4d', [Term(
- 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')])
- expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B'])
- tm.assert_panel4d_equal(result, expected)
+ with ensure_clean(self.path) as store:
+
+ p4d = tm.makePanel4D()
+
+ def check_indexers(key, indexers):
+ for i, idx in enumerate(indexers):
+ self.assert_(getattr(getattr(
+ store.root, key).table.description, idx)._v_pos == i)
+
+ # append then change (will take existing schema)
+ indexers = ['items', 'major_axis', 'minor_axis']
+
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
+ store.append('p4d', p4d.ix[:, :, 10:, :])
+ tm.assert_panel4d_equal(store.select('p4d'), p4d)
+ check_indexers('p4d', indexers)
+
+ # same as above, but try to append with differnt axes
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
+ store.append('p4d', p4d.ix[:, :, 10:, :], axes=[
+ 'labels', 'items', 'major_axis'])
+ tm.assert_panel4d_equal(store.select('p4d'), p4d)
+ check_indexers('p4d', indexers)
+
+ # pass incorrect number of axes
+ store.remove('p4d')
+ self.assertRaises(Exception, store.append, 'p4d', p4d.ix[
+ :, :, :10, :], axes=['major_axis', 'minor_axis'])
+
+ # different than default indexables #1
+ indexers = ['labels', 'major_axis', 'minor_axis']
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
+ store.append('p4d', p4d.ix[:, :, 10:, :])
+ tm.assert_panel4d_equal(store['p4d'], p4d)
+ check_indexers('p4d', indexers)
+
+ # different than default indexables #2
+ indexers = ['major_axis', 'labels', 'minor_axis']
+ store.remove('p4d')
+ store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
+ store.append('p4d', p4d.ix[:, :, 10:, :])
+ tm.assert_panel4d_equal(store['p4d'], p4d)
+ check_indexers('p4d', indexers)
+
+ # partial selection
+ result = store.select('p4d', ['labels=l1'])
+ expected = p4d.reindex(labels=['l1'])
+ tm.assert_panel4d_equal(result, expected)
+
+ # partial selection2
+ result = store.select('p4d', [Term(
+ 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')])
+ expected = p4d.reindex(
+ labels=['l1'], items=['ItemA'], minor_axis=['B'])
+ tm.assert_panel4d_equal(result, expected)
+
+ # non-existant partial selection
+ result = store.select('p4d', [Term(
+ 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')])
+ expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B'])
+ tm.assert_panel4d_equal(result, expected)
def test_append_with_strings(self):
- wp = tm.makePanel()
- wp2 = wp.rename_axis(
- dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2)
-
- def check_col(key,name,size):
- self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size)
-
- self.store.append('s1', wp, min_itemsize=20)
- self.store.append('s1', wp2)
- expected = concat([wp, wp2], axis=2)
- expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
- tm.assert_panel_equal(self.store['s1'], expected)
- check_col('s1', 'minor_axis', 20)
-
- # test dict format
- self.store.append('s2', wp, min_itemsize={'minor_axis': 20})
- self.store.append('s2', wp2)
- expected = concat([wp, wp2], axis=2)
- expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
- tm.assert_panel_equal(self.store['s2'], expected)
- check_col('s2', 'minor_axis', 20)
-
- # apply the wrong field (similar to #1)
- self.store.append('s3', wp, min_itemsize={'major_axis': 20})
- self.assertRaises(Exception, self.store.append, 's3')
-
- # test truncation of bigger strings
- self.store.append('s4', wp)
- self.assertRaises(Exception, self.store.append, 's4', wp2)
-
- # avoid truncation on elements
- df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
- self.store.append('df_big', df)
- tm.assert_frame_equal(self.store.select('df_big'), df)
- check_col('df_big', 'values_block_1', 15)
-
- # appending smaller string ok
- df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
- self.store.append('df_big', df2)
- expected = concat([df, df2])
- tm.assert_frame_equal(self.store.select('df_big'), expected)
- check_col('df_big', 'values_block_1', 15)
-
- # avoid truncation on elements
- df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
- self.store.append('df_big2', df, min_itemsize={'values': 50})
- tm.assert_frame_equal(self.store.select('df_big2'), df)
- check_col('df_big2', 'values_block_1', 50)
-
- # bigger string on next append
- self.store.append('df_new', df)
- df_new = DataFrame(
- [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
- self.assertRaises(Exception, self.store.append, 'df_new', df_new)
-
- # with nans
- self.store.remove('df')
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df.ix[1:4, 'string'] = np.nan
- df['string2'] = 'bar'
- df.ix[4:8, 'string2'] = np.nan
- df['string3'] = 'bah'
- df.ix[1:, 'string3'] = np.nan
- self.store.append('df', df)
- result = self.store.select('df')
- tm.assert_frame_equal(result, df)
- def test_append_with_data_columns(self):
+ with ensure_clean(self.path) as store:
+ wp = tm.makePanel()
+ wp2 = wp.rename_axis(
+ dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2)
+
+ def check_col(key,name,size):
+ self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size)
+
+ store.append('s1', wp, min_itemsize=20)
+ store.append('s1', wp2)
+ expected = concat([wp, wp2], axis=2)
+ expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
+ tm.assert_panel_equal(store['s1'], expected)
+ check_col('s1', 'minor_axis', 20)
+
+ # test dict format
+ store.append('s2', wp, min_itemsize={'minor_axis': 20})
+ store.append('s2', wp2)
+ expected = concat([wp, wp2], axis=2)
+ expected = expected.reindex(minor_axis=sorted(expected.minor_axis))
+ tm.assert_panel_equal(store['s2'], expected)
+ check_col('s2', 'minor_axis', 20)
+
+ # apply the wrong field (similar to #1)
+ store.append('s3', wp, min_itemsize={'major_axis': 20})
+ self.assertRaises(Exception, store.append, 's3')
+
+ # test truncation of bigger strings
+ store.append('s4', wp)
+ self.assertRaises(Exception, store.append, 's4', wp2)
+
+ # avoid truncation on elements
+ df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
+ store.append('df_big', df)
+ tm.assert_frame_equal(store.select('df_big'), df)
+ check_col('df_big', 'values_block_1', 15)
+
+ # appending smaller string ok
+ df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
+ store.append('df_big', df2)
+ expected = concat([df, df2])
+ tm.assert_frame_equal(store.select('df_big'), expected)
+ check_col('df_big', 'values_block_1', 15)
+
+ # avoid truncation on elements
+ df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
+ store.append('df_big2', df, min_itemsize={'values': 50})
+ tm.assert_frame_equal(store.select('df_big2'), df)
+ check_col('df_big2', 'values_block_1', 50)
+
+ # bigger string on next append
+ store.append('df_new', df)
+ df_new = DataFrame(
+ [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
+ self.assertRaises(Exception, store.append, 'df_new', df_new)
+
+ # with nans
+ store.remove('df')
+ df = tm.makeTimeDataFrame()
+ df['string'] = 'foo'
+ df.ix[1:4, 'string'] = np.nan
+ df['string2'] = 'bar'
+ df.ix[4:8, 'string2'] = np.nan
+ df['string3'] = 'bah'
+ df.ix[1:, 'string3'] = np.nan
+ store.append('df', df)
+ result = store.select('df')
+ tm.assert_frame_equal(result, df)
- df = tm.makeTimeDataFrame()
- self.store.remove('df')
- self.store.append('df', df[:2], data_columns=['B'])
- self.store.append('df', df[2:])
- tm.assert_frame_equal(self.store['df'], df)
-
- # check that we have indicies created
- assert(self.store.handle.root.df.table.cols.index.is_indexed is True)
- assert(self.store.handle.root.df.table.cols.B.is_indexed is True)
-
- # data column searching
- result = self.store.select('df', [Term('B>0')])
- expected = df[df.B > 0]
- tm.assert_frame_equal(result, expected)
-
- # data column searching (with an indexable and a data_columns)
- result = self.store.select(
- 'df', [Term('B>0'), Term('index', '>', df.index[3])])
- df_new = df.reindex(index=df.index[4:])
- expected = df_new[df_new.B > 0]
- tm.assert_frame_equal(result, expected)
-
- # data column selection with a string data_column
- df_new = df.copy()
- df_new['string'] = 'foo'
- df_new['string'][1:4] = np.nan
- df_new['string'][5:6] = 'bar'
- self.store.remove('df')
- self.store.append('df', df_new, data_columns=['string'])
- result = self.store.select('df', [Term('string', '=', 'foo')])
- expected = df_new[df_new.string == 'foo']
- tm.assert_frame_equal(result, expected)
-
- # using min_itemsize and a data column
- def check_col(key,name,size):
- self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size)
-
- self.store.remove('df')
- self.store.append('df', df_new, data_columns=['string'],
- min_itemsize={'string': 30})
- check_col('df', 'string', 30)
- self.store.remove('df')
- self.store.append(
- 'df', df_new, data_columns=['string'], min_itemsize=30)
- check_col('df', 'string', 30)
- self.store.remove('df')
- self.store.append('df', df_new, data_columns=['string'],
- min_itemsize={'values': 30})
- check_col('df', 'string', 30)
-
- df_new['string2'] = 'foobarbah'
- df_new['string_block1'] = 'foobarbah1'
- df_new['string_block2'] = 'foobarbah2'
- self.store.remove('df')
- self.store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50})
- check_col('df', 'string', 30)
- check_col('df', 'string2', 40)
- check_col('df', 'values_block_1', 50)
-
- # multiple data columns
- df_new = df.copy()
- df_new['string'] = 'foo'
- df_new['string'][1:4] = np.nan
- df_new['string'][5:6] = 'bar'
- df_new['string2'] = 'foo'
- df_new['string2'][2:5] = np.nan
- df_new['string2'][7:8] = 'bar'
- self.store.remove('df')
- self.store.append(
- 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
- result = self.store.select('df', [Term('string', '=', 'foo'), Term(
- 'string2=foo'), Term('A>0'), Term('B<0')])
- expected = df_new[(df_new.string == 'foo') & (
- df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
- tm.assert_frame_equal(result, expected)
-
- # yield an empty frame
- result = self.store.select('df', [Term('string', '=', 'foo'), Term(
- 'string2=bar'), Term('A>0'), Term('B<0')])
- expected = df_new[(df_new.string == 'foo') & (
- df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)]
- tm.assert_frame_equal(result, expected)
-
- # doc example
- df_dc = df.copy()
- df_dc['string'] = 'foo'
- df_dc.ix[4:6, 'string'] = np.nan
- df_dc.ix[7:9, 'string'] = 'bar'
- df_dc['string2'] = 'cool'
- df_dc['datetime'] = Timestamp('20010102')
- df_dc = df_dc.convert_objects()
- df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan
-
- self.store.remove('df_dc')
- self.store.append('df_dc', df_dc, data_columns=['B', 'C',
- 'string', 'string2', 'datetime'])
- result = self.store.select('df_dc', [Term('B>0')])
-
- expected = df_dc[df_dc.B > 0]
- tm.assert_frame_equal(result, expected)
-
- result = self.store.select(
- 'df_dc', ['B > 0', 'C > 0', 'string == foo'])
- expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
- df_dc.string == 'foo')]
- tm.assert_frame_equal(result, expected)
+ def test_append_with_data_columns(self):
+ with ensure_clean(self.path) as store:
+ df = tm.makeTimeDataFrame()
+ store.remove('df')
+ store.append('df', df[:2], data_columns=['B'])
+ store.append('df', df[2:])
+ tm.assert_frame_equal(store['df'], df)
+
+ # check that we have indicies created
+ assert(store._handle.root.df.table.cols.index.is_indexed is True)
+ assert(store._handle.root.df.table.cols.B.is_indexed is True)
+
+ # data column searching
+ result = store.select('df', [Term('B>0')])
+ expected = df[df.B > 0]
+ tm.assert_frame_equal(result, expected)
+
+ # data column searching (with an indexable and a data_columns)
+ result = store.select(
+ 'df', [Term('B>0'), Term('index', '>', df.index[3])])
+ df_new = df.reindex(index=df.index[4:])
+ expected = df_new[df_new.B > 0]
+ tm.assert_frame_equal(result, expected)
+
+ # data column selection with a string data_column
+ df_new = df.copy()
+ df_new['string'] = 'foo'
+ df_new['string'][1:4] = np.nan
+ df_new['string'][5:6] = 'bar'
+ store.remove('df')
+ store.append('df', df_new, data_columns=['string'])
+ result = store.select('df', [Term('string', '=', 'foo')])
+ expected = df_new[df_new.string == 'foo']
+ tm.assert_frame_equal(result, expected)
+
+ # using min_itemsize and a data column
+ def check_col(key,name,size):
+ self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size)
+
+ with ensure_clean(self.path) as store:
+ store.remove('df')
+ store.append('df', df_new, data_columns=['string'],
+ min_itemsize={'string': 30})
+ check_col('df', 'string', 30)
+ store.remove('df')
+ store.append(
+ 'df', df_new, data_columns=['string'], min_itemsize=30)
+ check_col('df', 'string', 30)
+ store.remove('df')
+ store.append('df', df_new, data_columns=['string'],
+ min_itemsize={'values': 30})
+ check_col('df', 'string', 30)
+
+ with ensure_clean(self.path) as store:
+ df_new['string2'] = 'foobarbah'
+ df_new['string_block1'] = 'foobarbah1'
+ df_new['string_block2'] = 'foobarbah2'
+ store.remove('df')
+ store.append('df', df_new, data_columns=['string', 'string2'], min_itemsize={'string': 30, 'string2': 40, 'values': 50})
+ check_col('df', 'string', 30)
+ check_col('df', 'string2', 40)
+ check_col('df', 'values_block_1', 50)
+
+ with ensure_clean(self.path) as store:
+ # multiple data columns
+ df_new = df.copy()
+ df_new['string'] = 'foo'
+ df_new['string'][1:4] = np.nan
+ df_new['string'][5:6] = 'bar'
+ df_new['string2'] = 'foo'
+ df_new['string2'][2:5] = np.nan
+ df_new['string2'][7:8] = 'bar'
+ store.remove('df')
+ store.append(
+ 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
+ result = store.select('df', [Term('string', '=', 'foo'), Term(
+ 'string2=foo'), Term('A>0'), Term('B<0')])
+ expected = df_new[(df_new.string == 'foo') & (
+ df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
+ tm.assert_frame_equal(result, expected)
+
+ # yield an empty frame
+ result = store.select('df', [Term('string', '=', 'foo'), Term(
+ 'string2=bar'), Term('A>0'), Term('B<0')])
+ expected = df_new[(df_new.string == 'foo') & (
+ df_new.string2 == 'bar') & (df_new.A > 0) & (df_new.B < 0)]
+ tm.assert_frame_equal(result, expected)
+
+ with ensure_clean(self.path) as store:
+ # doc example
+ df_dc = df.copy()
+ df_dc['string'] = 'foo'
+ df_dc.ix[4:6, 'string'] = np.nan
+ df_dc.ix[7:9, 'string'] = 'bar'
+ df_dc['string2'] = 'cool'
+ df_dc['datetime'] = Timestamp('20010102')
+ df_dc = df_dc.convert_objects()
+ df_dc.ix[3:5, ['A', 'B', 'datetime']] = np.nan
+
+ store.remove('df_dc')
+ store.append('df_dc', df_dc, data_columns=['B', 'C',
+ 'string', 'string2', 'datetime'])
+ result = store.select('df_dc', [Term('B>0')])
+
+ expected = df_dc[df_dc.B > 0]
+ tm.assert_frame_equal(result, expected)
+
+ result = store.select(
+ 'df_dc', ['B > 0', 'C > 0', 'string == foo'])
+ expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
+ df_dc.string == 'foo')]
+ tm.assert_frame_equal(result, expected)
+
def test_create_table_index(self):
-
- def col(t,column):
- return getattr(self.store.get_storer(t).table.cols,column)
-
- # index=False
- wp = tm.makePanel()
- self.store.append('p5', wp, index=False)
- self.store.create_table_index('p5', columns=['major_axis'])
- assert(col('p5', 'major_axis').is_indexed is True)
- assert(col('p5', 'minor_axis').is_indexed is False)
-
- # index=True
- self.store.append('p5i', wp, index=True)
- assert(col('p5i', 'major_axis').is_indexed is True)
- assert(col('p5i', 'minor_axis').is_indexed is True)
-
- # default optlevels
- self.store.get_storer('p5').create_index()
- assert(col('p5', 'major_axis').index.optlevel == 6)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
-
- # let's change the indexing scheme
- self.store.create_table_index('p5')
- assert(col('p5', 'major_axis').index.optlevel == 6)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
- self.store.create_table_index('p5', optlevel=9)
- assert(col('p5', 'major_axis').index.optlevel == 9)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
- self.store.create_table_index('p5', kind='full')
- assert(col('p5', 'major_axis').index.optlevel == 9)
- assert(col('p5', 'minor_axis').index.kind == 'full')
- self.store.create_table_index('p5', optlevel=1, kind='light')
- assert(col('p5', 'major_axis').index.optlevel == 1)
- assert(col('p5', 'minor_axis').index.kind == 'light')
-
- # data columns
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df['string2'] = 'bar'
- self.store.append('f', df, data_columns=['string', 'string2'])
- assert(col('f', 'index').is_indexed is True)
- assert(col('f', 'string').is_indexed is True)
- assert(col('f', 'string2').is_indexed is True)
-
- # specify index=columns
- self.store.append(
- 'f2', df, index=['string'], data_columns=['string', 'string2'])
- assert(col('f2', 'index').is_indexed is False)
- assert(col('f2', 'string').is_indexed is True)
- assert(col('f2', 'string2').is_indexed is False)
-
- # try to index a non-table
- self.store.remove('f2')
- self.store.put('f2', df)
- self.assertRaises(Exception, self.store.create_table_index, 'f2')
-
- # try to change the version supports flag
- from pandas.io import pytables
- pytables._table_supports_index = False
- self.assertRaises(Exception, self.store.create_table_index, 'f')
-
- # test out some versions
- original = tables.__version__
-
- for v in ['2.2', '2.2b']:
- pytables._table_mod = None
- pytables._table_supports_index = False
- tables.__version__ = v
- self.assertRaises(Exception, self.store.create_table_index, 'f')
-
- for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]:
- pytables._table_mod = None
+
+ with ensure_clean(self.path) as store:
+
+ def col(t,column):
+ return getattr(store.get_storer(t).table.cols,column)
+
+ # index=False
+ wp = tm.makePanel()
+ store.append('p5', wp, index=False)
+ store.create_table_index('p5', columns=['major_axis'])
+ assert(col('p5', 'major_axis').is_indexed is True)
+ assert(col('p5', 'minor_axis').is_indexed is False)
+
+ # index=True
+ store.append('p5i', wp, index=True)
+ assert(col('p5i', 'major_axis').is_indexed is True)
+ assert(col('p5i', 'minor_axis').is_indexed is True)
+
+ # default optlevels
+ store.get_storer('p5').create_index()
+ assert(col('p5', 'major_axis').index.optlevel == 6)
+ assert(col('p5', 'minor_axis').index.kind == 'medium')
+
+ # let's change the indexing scheme
+ store.create_table_index('p5')
+ assert(col('p5', 'major_axis').index.optlevel == 6)
+ assert(col('p5', 'minor_axis').index.kind == 'medium')
+ store.create_table_index('p5', optlevel=9)
+ assert(col('p5', 'major_axis').index.optlevel == 9)
+ assert(col('p5', 'minor_axis').index.kind == 'medium')
+ store.create_table_index('p5', kind='full')
+ assert(col('p5', 'major_axis').index.optlevel == 9)
+ assert(col('p5', 'minor_axis').index.kind == 'full')
+ store.create_table_index('p5', optlevel=1, kind='light')
+ assert(col('p5', 'major_axis').index.optlevel == 1)
+ assert(col('p5', 'minor_axis').index.kind == 'light')
+
+ # data columns
+ df = tm.makeTimeDataFrame()
+ df['string'] = 'foo'
+ df['string2'] = 'bar'
+ store.append('f', df, data_columns=['string', 'string2'])
+ assert(col('f', 'index').is_indexed is True)
+ assert(col('f', 'string').is_indexed is True)
+ assert(col('f', 'string2').is_indexed is True)
+
+ # specify index=columns
+ store.append(
+ 'f2', df, index=['string'], data_columns=['string', 'string2'])
+ assert(col('f2', 'index').is_indexed is False)
+ assert(col('f2', 'string').is_indexed is True)
+ assert(col('f2', 'string2').is_indexed is False)
+
+ # try to index a non-table
+ store.remove('f2')
+ store.put('f2', df)
+ self.assertRaises(Exception, store.create_table_index, 'f2')
+
+ # try to change the version supports flag
+ from pandas.io import pytables
pytables._table_supports_index = False
- tables.__version__ = v
- self.store.create_table_index('f')
- pytables._table_mod = None
- pytables._table_supports_index = False
- tables.__version__ = original
+ self.assertRaises(Exception, store.create_table_index, 'f')
+
+ # test out some versions
+ original = tables.__version__
+
+ for v in ['2.2', '2.2b']:
+ pytables._table_mod = None
+ pytables._table_supports_index = False
+ tables.__version__ = v
+ self.assertRaises(Exception, store.create_table_index, 'f')
+
+ for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', original]:
+ pytables._table_mod = None
+ pytables._table_supports_index = False
+ tables.__version__ = v
+ store.create_table_index('f')
+ pytables._table_mod = None
+ pytables._table_supports_index = False
+ tables.__version__ = original
def test_big_table_frame(self):
raise nose.SkipTest('no big table frame')
@@ -715,14 +787,10 @@ def test_big_table_frame(self):
import time
x = time.time()
- try:
- store = HDFStore(self.scratchpath)
+ with ensure_clean(self.path,mode='w') as store:
store.append('df', df)
rows = store.root.df.table.nrows
recons = store.select('df')
- finally:
- store.close()
- os.remove(self.scratchpath)
print "\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x)
@@ -743,25 +811,18 @@ def test_big_table2_frame(self):
df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0)
print "\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)
- fn = 'big_table2.h5'
-
- try:
- def f(chunksize):
- store = HDFStore(fn, mode='w')
+ def f(chunksize):
+ with ensure_clean(self.path,mode='w') as store:
store.append('df', df, chunksize=chunksize)
r = store.root.df.table.nrows
- store.close()
return r
- for c in [10000, 50000, 250000]:
- start_time = time.time()
- print "big_table2 frame [chunk->%s]" % c
- rows = f(c)
- print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time)
-
- finally:
- os.remove(fn)
+ for c in [10000, 50000, 250000]:
+ start_time = time.time()
+ print "big_table2 frame [chunk->%s]" % c
+ rows = f(c)
+ print "big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time)
def test_big_put_frame(self):
raise nose.SkipTest('no big put frame')
@@ -777,21 +838,15 @@ def test_big_put_frame(self):
df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0)
print "\nbig_put frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)
- fn = 'big_put.h5'
-
- try:
+ with ensure_clean(self.path, mode='w') as store:
start_time = time.time()
store = HDFStore(fn, mode='w')
store.put('df', df)
- store.close()
print df.get_dtype_counts()
print "big_put frame [shape->%s] -> %5.2f" % (df.shape, time.time() - start_time)
- finally:
- os.remove(fn)
-
def test_big_table_panel(self):
raise nose.SkipTest('no big table panel')
@@ -807,27 +862,25 @@ def test_big_table_panel(self):
import time
x = time.time()
- try:
- store = HDFStore(self.scratchpath)
+
+
+ with ensure_clean(self.path, mode='w') as store:
store.append('wp', wp)
rows = store.root.wp.table.nrows
recons = store.select('wp')
- finally:
- store.close()
- os.remove(self.scratchpath)
print "\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x)
def test_append_diff_item_order(self):
- raise nose.SkipTest('append diff item order')
wp = tm.makePanel()
wp1 = wp.ix[:, :10, :]
wp2 = wp.ix[['ItemC', 'ItemB', 'ItemA'], 10:, :]
-
- self.store.put('panel', wp1, table=True)
- self.assertRaises(Exception, self.store.put, 'panel', wp2,
- append=True)
+
+ with ensure_clean(self.path) as store:
+ store.put('panel', wp1, table=True)
+ self.assertRaises(Exception, store.put, 'panel', wp2,
+ append=True)
def test_append_hierarchical(self):
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
@@ -838,75 +891,81 @@ def test_append_hierarchical(self):
df = DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])
- self.store.append('mi', df)
- result = self.store.select('mi')
- tm.assert_frame_equal(result, df)
+ with ensure_clean(self.path) as store:
+ store.append('mi', df)
+ result = store.select('mi')
+ tm.assert_frame_equal(result, df)
def test_append_misc(self):
- # unsuported data types for non-tables
- p4d = tm.makePanel4D()
- self.assertRaises(Exception, self.store.put,'p4d',p4d)
+ with ensure_clean(self.path) as store:
- # unsupported data type for table
- s = tm.makeStringSeries()
- self.assertRaises(Exception, self.store.append,'s',s)
+ # unsuported data types for non-tables
+ p4d = tm.makePanel4D()
+ self.assertRaises(Exception, store.put,'p4d',p4d)
- # unsuported data types
- self.assertRaises(Exception, self.store.put,'abc',None)
- self.assertRaises(Exception, self.store.put,'abc','123')
- self.assertRaises(Exception, self.store.put,'abc',123)
- self.assertRaises(Exception, self.store.put,'abc',np.arange(5))
+ # unsupported data type for table
+ s = tm.makeStringSeries()
+ self.assertRaises(Exception, store.append,'s',s)
- df = tm.makeDataFrame()
- self.store.append('df', df, chunksize=1)
- result = self.store.select('df')
- tm.assert_frame_equal(result, df)
+ # unsuported data types
+ self.assertRaises(Exception, store.put,'abc',None)
+ self.assertRaises(Exception, store.put,'abc','123')
+ self.assertRaises(Exception, store.put,'abc',123)
+ self.assertRaises(Exception, store.put,'abc',np.arange(5))
- self.store.append('df1', df, expectedrows=10)
- result = self.store.select('df1')
- tm.assert_frame_equal(result, df)
+ df = tm.makeDataFrame()
+ store.append('df', df, chunksize=1)
+ result = store.select('df')
+ tm.assert_frame_equal(result, df)
+
+ store.append('df1', df, expectedrows=10)
+ result = store.select('df1')
+ tm.assert_frame_equal(result, df)
def test_table_index_incompatible_dtypes(self):
df1 = DataFrame({'a': [1, 2, 3]})
df2 = DataFrame({'a': [4, 5, 6]},
index=date_range('1/1/2000', periods=3))
- self.store.put('frame', df1, table=True)
- self.assertRaises(Exception, self.store.put, 'frame', df2,
- table=True, append=True)
+ with ensure_clean(self.path) as store:
+ store.put('frame', df1, table=True)
+ self.assertRaises(Exception, store.put, 'frame', df2,
+ table=True, append=True)
def test_table_values_dtypes_roundtrip(self):
- df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
- self.store.append('df_f8', df1)
- assert df1.dtypes == self.store['df_f8'].dtypes
-
- df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
- self.store.append('df_i8', df2)
- assert df2.dtypes == self.store['df_i8'].dtypes
-
- # incompatible dtype
- self.assertRaises(Exception, self.store.append, 'df_i8', df1)
-
- # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought)
- df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A'])
- self.store.append('df_f4', df1)
- assert df1.dtypes == self.store['df_f4'].dtypes
- assert df1.dtypes[0] == 'float32'
-
- # check with mixed dtypes (but not multi float types)
- df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
- df1['string'] = 'foo'
- self.store.append('df_mixed_dtypes1', df1)
- assert (df1.dtypes == self.store['df_mixed_dtypes1'].dtypes).all() == True
- assert df1.dtypes[0] == 'float32'
- assert df1.dtypes[1] == 'object'
-
- ### this is not supported, e.g. mixed float32/float64 blocks ###
- #df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
- #df1['float64'] = 1.0
- #self.store.append('df_mixed_dtypes2', df1)
- #assert df1.dtypes == self.store['df_mixed_dtypes2'].dtypes).all() == True
+
+ with ensure_clean(self.path) as store:
+ df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
+ store.append('df_f8', df1)
+ assert df1.dtypes == store['df_f8'].dtypes
+
+ df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
+ store.append('df_i8', df2)
+ assert df2.dtypes == store['df_i8'].dtypes
+
+ # incompatible dtype
+ self.assertRaises(Exception, store.append, 'df_i8', df1)
+
+ # check creation/storage/retrieval of float32 (a bit hacky to actually create them thought)
+ df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['A'])
+ store.append('df_f4', df1)
+ assert df1.dtypes == store['df_f4'].dtypes
+ assert df1.dtypes[0] == 'float32'
+
+ # check with mixed dtypes (but not multi float types)
+ df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
+ df1['string'] = 'foo'
+ store.append('df_mixed_dtypes1', df1)
+ assert (df1.dtypes == store['df_mixed_dtypes1'].dtypes).all() == True
+ assert df1.dtypes[0] == 'float32'
+ assert df1.dtypes[1] == 'object'
+
+ ### this is not supported, e.g. mixed float32/float64 blocks ###
+ #df1 = DataFrame(np.array([[1],[2],[3]],dtype='f4'),columns = ['float32'])
+ #df1['float64'] = 1.0
+ #store.append('df_mixed_dtypes2', df1)
+ #assert df1.dtypes == store['df_mixed_dtypes2'].dtypes).all() == True
def test_table_mixed_dtypes(self):
@@ -926,8 +985,9 @@ def test_table_mixed_dtypes(self):
df.ix[3:6, ['obj1']] = np.nan
df = df.consolidate().convert_objects()
- self.store.append('df1_mixed', df)
- tm.assert_frame_equal(self.store.select('df1_mixed'), df)
+ with ensure_clean(self.path) as store:
+ store.append('df1_mixed', df)
+ tm.assert_frame_equal(store.select('df1_mixed'), df)
# panel
wp = tm.makePanel()
@@ -939,8 +999,9 @@ def test_table_mixed_dtypes(self):
wp['int2'] = 2
wp = wp.consolidate()
- self.store.append('p1_mixed', wp)
- tm.assert_panel_equal(self.store.select('p1_mixed'), wp)
+ with ensure_clean(self.path) as store:
+ store.append('p1_mixed', wp)
+ tm.assert_panel_equal(store.select('p1_mixed'), wp)
# ndim
wp = tm.makePanel4D()
@@ -952,16 +1013,20 @@ def test_table_mixed_dtypes(self):
wp['int2'] = 2
wp = wp.consolidate()
- self.store.append('p4d_mixed', wp)
- tm.assert_panel4d_equal(self.store.select('p4d_mixed'), wp)
+ with ensure_clean(self.path) as store:
+ store.append('p4d_mixed', wp)
+ tm.assert_panel4d_equal(store.select('p4d_mixed'), wp)
def test_unimplemented_dtypes_table_columns(self):
- #### currently not supported dtypes ####
- for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]:
- df = tm.makeDataFrame()
- df[n] = f
- self.assertRaises(
- NotImplementedError, self.store.append, 'df1_%s' % n, df)
+
+ with ensure_clean(self.path) as store:
+
+ ### currently not supported dtypes ####
+ for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]:
+ df = tm.makeDataFrame()
+ df[n] = f
+ self.assertRaises(
+ NotImplementedError, store.append, 'df1_%s' % n, df)
# frame
df = tm.makeDataFrame()
@@ -970,271 +1035,288 @@ def test_unimplemented_dtypes_table_columns(self):
df['datetime1'] = datetime.date(2001, 1, 2)
df = df.consolidate().convert_objects()
- # this fails because we have a date in the object block......
- self.assertRaises(Exception, self.store.append, 'df_unimplemented', df)
+ with ensure_clean(self.path) as store:
+ # this fails because we have a date in the object block......
+ self.assertRaises(Exception, store.append, 'df_unimplemented', df)
def test_remove(self):
- ts = tm.makeTimeSeries()
- df = tm.makeDataFrame()
- self.store['a'] = ts
- self.store['b'] = df
- self.store.remove('a')
- self.assertEquals(len(self.store), 1)
- tm.assert_frame_equal(df, self.store['b'])
-
- self.store.remove('b')
- self.assertEquals(len(self.store), 0)
-
- # pathing
- self.store['a'] = ts
- self.store['b/foo'] = df
- self.store.remove('foo')
- self.store.remove('b/foo')
- self.assertEquals(len(self.store), 1)
-
- self.store['a'] = ts
- self.store['b/foo'] = df
- self.store.remove('b')
- self.assertEquals(len(self.store), 1)
-
- # __delitem__
- self.store['a'] = ts
- self.store['b'] = df
- del self.store['a']
- del self.store['b']
- self.assertEquals(len(self.store), 0)
- def test_remove_where(self):
+ with ensure_clean(self.path) as store:
- # non-existance
- crit1 = Term('index', '>', 'foo')
- self.store.remove('a', where=[crit1])
+ ts = tm.makeTimeSeries()
+ df = tm.makeDataFrame()
+ store['a'] = ts
+ store['b'] = df
+ store.remove('a')
+ self.assertEquals(len(store), 1)
+ tm.assert_frame_equal(df, store['b'])
+
+ store.remove('b')
+ self.assertEquals(len(store), 0)
+
+ # pathing
+ store['a'] = ts
+ store['b/foo'] = df
+ store.remove('foo')
+ store.remove('b/foo')
+ self.assertEquals(len(store), 1)
+
+ store['a'] = ts
+ store['b/foo'] = df
+ store.remove('b')
+ self.assertEquals(len(store), 1)
+
+ # __delitem__
+ store['a'] = ts
+ store['b'] = df
+ del store['a']
+ del store['b']
+ self.assertEquals(len(store), 0)
- # try to remove non-table (with crit)
- # non-table ok (where = None)
- wp = tm.makePanel()
- self.store.put('wp', wp, table=True)
- self.store.remove('wp', [('minor_axis', ['A', 'D'])])
- rs = self.store.select('wp')
- expected = wp.reindex(minor_axis=['B', 'C'])
- tm.assert_panel_equal(rs, expected)
-
- # empty where
- self.store.remove('wp')
- self.store.put('wp', wp, table=True)
-
- # deleted number (entire table)
- n = self.store.remove('wp', [])
- assert(n == 120)
-
- # non - empty where
- self.store.remove('wp')
- self.store.put('wp', wp, table=True)
- self.assertRaises(Exception, self.store.remove,
- 'wp', ['foo'])
-
- # selectin non-table with a where
- # self.store.put('wp2', wp, table=False)
- # self.assertRaises(Exception, self.store.remove,
- # 'wp2', [('column', ['A', 'D'])])
+ def test_remove_where(self):
+
+ with ensure_clean(self.path) as store:
+
+ # non-existance
+ crit1 = Term('index', '>', 'foo')
+ store.remove('a', where=[crit1])
+
+ # try to remove non-table (with crit)
+ # non-table ok (where = None)
+ wp = tm.makePanel()
+ store.put('wp', wp, table=True)
+ store.remove('wp', [('minor_axis', ['A', 'D'])])
+ rs = store.select('wp')
+ expected = wp.reindex(minor_axis=['B', 'C'])
+ tm.assert_panel_equal(rs, expected)
+
+ # empty where
+ store.remove('wp')
+ store.put('wp', wp, table=True)
+
+ # deleted number (entire table)
+ n = store.remove('wp', [])
+ assert(n == 120)
+
+ # non - empty where
+ store.remove('wp')
+ store.put('wp', wp, table=True)
+ self.assertRaises(Exception, store.remove,
+ 'wp', ['foo'])
+
+ # selectin non-table with a where
+ # store.put('wp2', wp, table=False)
+ # self.assertRaises(Exception, store.remove,
+ # 'wp2', [('column', ['A', 'D'])])
def test_remove_crit(self):
- wp = tm.makePanel()
- # group row removal
- date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
- crit4 = Term('major_axis', date4)
- self.store.put('wp3', wp, table=True)
- n = self.store.remove('wp3', where=[crit4])
- assert(n == 36)
- result = self.store.select('wp3')
- expected = wp.reindex(major_axis=wp.major_axis - date4)
- tm.assert_panel_equal(result, expected)
-
- # upper half
- self.store.put('wp', wp, table=True)
- date = wp.major_axis[len(wp.major_axis) // 2]
-
- crit1 = Term('major_axis', '>', date)
- crit2 = Term('minor_axis', ['A', 'D'])
- n = self.store.remove('wp', where=[crit1])
-
- assert(n == 56)
-
- n = self.store.remove('wp', where=[crit2])
- assert(n == 32)
-
- result = self.store['wp']
- expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
- tm.assert_panel_equal(result, expected)
-
- # individual row elements
- self.store.put('wp2', wp, table=True)
-
- date1 = wp.major_axis[1:3]
- crit1 = Term('major_axis', date1)
- self.store.remove('wp2', where=[crit1])
- result = self.store.select('wp2')
- expected = wp.reindex(major_axis=wp.major_axis - date1)
- tm.assert_panel_equal(result, expected)
-
- date2 = wp.major_axis[5]
- crit2 = Term('major_axis', date2)
- self.store.remove('wp2', where=[crit2])
- result = self.store['wp2']
- expected = wp.reindex(
- major_axis=wp.major_axis - date1 - Index([date2]))
- tm.assert_panel_equal(result, expected)
-
- date3 = [wp.major_axis[7], wp.major_axis[9]]
- crit3 = Term('major_axis', date3)
- self.store.remove('wp2', where=[crit3])
- result = self.store['wp2']
- expected = wp.reindex(
- major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
- tm.assert_panel_equal(result, expected)
-
- # corners
- self.store.put('wp4', wp, table=True)
- n = self.store.remove(
- 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])])
- result = self.store.select('wp4')
- tm.assert_panel_equal(result, wp)
+ with ensure_clean(self.path) as store:
+
+ wp = tm.makePanel()
+
+ # group row removal
+ date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
+ crit4 = Term('major_axis', date4)
+ store.put('wp3', wp, table=True)
+ n = store.remove('wp3', where=[crit4])
+ assert(n == 36)
+ result = store.select('wp3')
+ expected = wp.reindex(major_axis=wp.major_axis - date4)
+ tm.assert_panel_equal(result, expected)
+
+ # upper half
+ store.put('wp', wp, table=True)
+ date = wp.major_axis[len(wp.major_axis) // 2]
+
+ crit1 = Term('major_axis', '>', date)
+ crit2 = Term('minor_axis', ['A', 'D'])
+ n = store.remove('wp', where=[crit1])
+
+ assert(n == 56)
+
+ n = store.remove('wp', where=[crit2])
+ assert(n == 32)
+
+ result = store['wp']
+ expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
+ tm.assert_panel_equal(result, expected)
+
+ # individual row elements
+ store.put('wp2', wp, table=True)
+
+ date1 = wp.major_axis[1:3]
+ crit1 = Term('major_axis', date1)
+ store.remove('wp2', where=[crit1])
+ result = store.select('wp2')
+ expected = wp.reindex(major_axis=wp.major_axis - date1)
+ tm.assert_panel_equal(result, expected)
+
+ date2 = wp.major_axis[5]
+ crit2 = Term('major_axis', date2)
+ store.remove('wp2', where=[crit2])
+ result = store['wp2']
+ expected = wp.reindex(
+ major_axis=wp.major_axis - date1 - Index([date2]))
+ tm.assert_panel_equal(result, expected)
+
+ date3 = [wp.major_axis[7], wp.major_axis[9]]
+ crit3 = Term('major_axis', date3)
+ store.remove('wp2', where=[crit3])
+ result = store['wp2']
+ expected = wp.reindex(
+ major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
+ tm.assert_panel_equal(result, expected)
+
+ # corners
+ store.put('wp4', wp, table=True)
+ n = store.remove(
+ 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])])
+ result = store.select('wp4')
+ tm.assert_panel_equal(result, wp)
def test_terms(self):
- wp = tm.makePanel()
- p4d = tm.makePanel4D()
- self.store.put('wp', wp, table=True)
- self.store.put('p4d', p4d, table=True)
-
- # some invalid terms
- terms = [
- ['minor', ['A', 'B']],
- ['index', ['20121114']],
- ['index', ['20121114', '20121114']],
- ]
- for t in terms:
- self.assertRaises(Exception, self.store.select, 'wp', t)
-
- self.assertRaises(Exception, Term.__init__)
- self.assertRaises(Exception, Term.__init__, 'blah')
- self.assertRaises(Exception, Term.__init__, 'index')
- self.assertRaises(Exception, Term.__init__, 'index', '==')
- self.assertRaises(Exception, Term.__init__, 'index', '>', 5)
-
- # panel
- result = self.store.select('wp', [Term(
- 'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])])
- expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
- tm.assert_panel_equal(result, expected)
-
- # p4d
- result = self.store.select('p4d', [Term('major_axis<20000108'),
- Term('minor_axis', '=', ['A', 'B']),
- Term('items', '=', ['ItemA', 'ItemB'])])
- expected = p4d.truncate(after='20000108').reindex(
- minor=['A', 'B'], items=['ItemA', 'ItemB'])
- tm.assert_panel4d_equal(result, expected)
-
- # valid terms
- terms = [
- dict(field='major_axis', op='>', value='20121114'),
- ('major_axis', '20121114'),
- ('major_axis', '>', '20121114'),
- (('major_axis', ['20121114', '20121114']),),
- ('major_axis', datetime.datetime(2012, 11, 14)),
- 'major_axis> 20121114',
- 'major_axis >20121114',
- 'major_axis > 20121114',
- (('minor_axis', ['A', 'B']),),
- (('minor_axis', ['A', 'B']),),
- ((('minor_axis', ['A', 'B']),),),
- (('items', ['ItemA', 'ItemB']),),
- ('items=ItemA'),
- ]
-
- for t in terms:
- self.store.select('wp', t)
- self.store.select('p4d', t)
-
- # valid for p4d only
- terms = [
- (('labels', '=', ['l1', 'l2']),),
- Term('labels', '=', ['l1', 'l2']),
- ]
-
- for t in terms:
- self.store.select('p4d', t)
+ with ensure_clean(self.path) as store:
+
+ wp = tm.makePanel()
+ p4d = tm.makePanel4D()
+ store.put('wp', wp, table=True)
+ store.put('p4d', p4d, table=True)
+
+ # some invalid terms
+ terms = [
+ ['minor', ['A', 'B']],
+ ['index', ['20121114']],
+ ['index', ['20121114', '20121114']],
+ ]
+ for t in terms:
+ self.assertRaises(Exception, store.select, 'wp', t)
+
+ self.assertRaises(Exception, Term.__init__)
+ self.assertRaises(Exception, Term.__init__, 'blah')
+ self.assertRaises(Exception, Term.__init__, 'index')
+ self.assertRaises(Exception, Term.__init__, 'index', '==')
+ self.assertRaises(Exception, Term.__init__, 'index', '>', 5)
+
+ # panel
+ result = store.select('wp', [Term(
+ 'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])])
+ expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
+ tm.assert_panel_equal(result, expected)
+
+ # p4d
+ result = store.select('p4d', [Term('major_axis<20000108'),
+ Term('minor_axis', '=', ['A', 'B']),
+ Term('items', '=', ['ItemA', 'ItemB'])])
+ expected = p4d.truncate(after='20000108').reindex(
+ minor=['A', 'B'], items=['ItemA', 'ItemB'])
+ tm.assert_panel4d_equal(result, expected)
+
+ # valid terms
+ terms = [
+ dict(field='major_axis', op='>', value='20121114'),
+ ('major_axis', '20121114'),
+ ('major_axis', '>', '20121114'),
+ (('major_axis', ['20121114', '20121114']),),
+ ('major_axis', datetime.datetime(2012, 11, 14)),
+ 'major_axis> 20121114',
+ 'major_axis >20121114',
+ 'major_axis > 20121114',
+ (('minor_axis', ['A', 'B']),),
+ (('minor_axis', ['A', 'B']),),
+ ((('minor_axis', ['A', 'B']),),),
+ (('items', ['ItemA', 'ItemB']),),
+ ('items=ItemA'),
+ ]
+
+ for t in terms:
+ store.select('wp', t)
+ store.select('p4d', t)
+
+ # valid for p4d only
+ terms = [
+ (('labels', '=', ['l1', 'l2']),),
+ Term('labels', '=', ['l1', 'l2']),
+ ]
+
+ for t in terms:
+ store.select('p4d', t)
def test_series(self):
+
s = tm.makeStringSeries()
self._check_roundtrip(s, tm.assert_series_equal)
-
+
ts = tm.makeTimeSeries()
self._check_roundtrip(ts, tm.assert_series_equal)
-
+
ts2 = Series(ts.index, Index(ts.index, dtype=object))
self._check_roundtrip(ts2, tm.assert_series_equal)
-
+
ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
dtype=object))
self._check_roundtrip(ts3, tm.assert_series_equal)
-
+
def test_sparse_series(self):
+
s = tm.makeStringSeries()
s[3:5] = np.nan
ss = s.to_sparse()
self._check_roundtrip(ss, tm.assert_series_equal,
check_series_type=True)
-
+
ss2 = s.to_sparse(kind='integer')
self._check_roundtrip(ss2, tm.assert_series_equal,
check_series_type=True)
-
+
ss3 = s.to_sparse(fill_value=0)
self._check_roundtrip(ss3, tm.assert_series_equal,
check_series_type=True)
def test_sparse_frame(self):
+
s = tm.makeDataFrame()
s.ix[3:5, 1:3] = np.nan
s.ix[8:10, -2] = np.nan
ss = s.to_sparse()
self._check_double_roundtrip(ss, tm.assert_frame_equal,
check_frame_type=True)
-
+
ss2 = s.to_sparse(kind='integer')
self._check_double_roundtrip(ss2, tm.assert_frame_equal,
check_frame_type=True)
-
+
ss3 = s.to_sparse(fill_value=0)
self._check_double_roundtrip(ss3, tm.assert_frame_equal,
check_frame_type=True)
-
+
def test_sparse_panel(self):
+
items = ['x', 'y', 'z']
p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items))
sp = p.to_sparse()
-
+
self._check_double_roundtrip(sp, tm.assert_panel_equal,
check_panel_type=True)
-
+
sp2 = p.to_sparse(kind='integer')
self._check_double_roundtrip(sp2, tm.assert_panel_equal,
check_panel_type=True)
-
+
sp3 = p.to_sparse(fill_value=0)
self._check_double_roundtrip(sp3, tm.assert_panel_equal,
check_panel_type=True)
def test_float_index(self):
+
# GH #454
index = np.random.randn(10)
s = Series(np.random.randn(10), index=index)
self._check_roundtrip(s, tm.assert_series_equal)
-
+
def test_tuple_index(self):
+
# GH #492
col = np.arange(10)
idx = [(0., 1.), (2., 3.), (4., 5.)]
@@ -1243,8 +1325,9 @@ def test_tuple_index(self):
warnings.filterwarnings('ignore', category=PerformanceWarning)
self._check_roundtrip(DF, tm.assert_frame_equal)
warnings.filterwarnings('always', category=PerformanceWarning)
-
+
def test_index_types(self):
+
values = np.random.randn(2)
func = lambda l, r: tm.assert_series_equal(l, r, True, True, True)
@@ -1253,45 +1336,47 @@ def test_index_types(self):
ser = Series(values, [0, 'y'])
self._check_roundtrip(ser, func)
warnings.filterwarnings('always', category=PerformanceWarning)
-
+
ser = Series(values, [datetime.datetime.today(), 0])
self._check_roundtrip(ser, func)
-
+
ser = Series(values, ['y', 0])
self._check_roundtrip(ser, func)
-
+
warnings.filterwarnings('ignore', category=PerformanceWarning)
ser = Series(values, [datetime.date.today(), 'a'])
self._check_roundtrip(ser, func)
warnings.filterwarnings('always', category=PerformanceWarning)
-
+
warnings.filterwarnings('ignore', category=PerformanceWarning)
ser = Series(values, [1.23, 'b'])
self._check_roundtrip(ser, func)
warnings.filterwarnings('always', category=PerformanceWarning)
-
+
ser = Series(values, [1, 1.53])
self._check_roundtrip(ser, func)
-
+
ser = Series(values, [1, 5])
self._check_roundtrip(ser, func)
-
+
ser = Series(values, [datetime.datetime(
- 2012, 1, 1), datetime.datetime(2012, 1, 2)])
+ 2012, 1, 1), datetime.datetime(2012, 1, 2)])
self._check_roundtrip(ser, func)
-
+
def test_timeseries_preepoch(self):
+
if sys.version_info[0] == 2 and sys.version_info[1] < 7:
raise nose.SkipTest
-
+
dr = bdate_range('1/1/1940', '1/1/1960')
ts = Series(np.random.randn(len(dr)), index=dr)
try:
self._check_roundtrip(ts, tm.assert_series_equal)
except OverflowError:
raise nose.SkipTest('known failer on some windows platforms')
-
+
def test_frame(self):
+
df = tm.makeDataFrame()
# put in some random NAs
@@ -1300,23 +1385,24 @@ def test_frame(self):
self._check_roundtrip_table(df, tm.assert_frame_equal)
self._check_roundtrip(df, tm.assert_frame_equal)
-
+
self._check_roundtrip_table(df, tm.assert_frame_equal,
compression=True)
self._check_roundtrip(df, tm.assert_frame_equal,
compression=True)
-
+
tdf = tm.makeTimeDataFrame()
self._check_roundtrip(tdf, tm.assert_frame_equal)
self._check_roundtrip(tdf, tm.assert_frame_equal,
compression=True)
-
- # not consolidated
- df['foo'] = np.random.randn(len(df))
- self.store['df'] = df
- recons = self.store['df']
- self.assert_(recons._data.is_consolidated())
-
+
+ with ensure_clean(self.path) as store:
+ # not consolidated
+ df['foo'] = np.random.randn(len(df))
+ store['df'] = df
+ recons = store['df']
+ self.assert_(recons._data.is_consolidated())
+
# empty
self._check_roundtrip(df[:0], tm.assert_frame_equal)
@@ -1332,37 +1418,33 @@ def test_empty_series_frame(self):
self._check_roundtrip(df0, tm.assert_frame_equal)
self._check_roundtrip(df1, tm.assert_frame_equal)
self._check_roundtrip(df2, tm.assert_frame_equal)
-
+
def test_can_serialize_dates(self):
+
rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
+
self._check_roundtrip(frame, tm.assert_frame_equal)
def test_timezones(self):
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- try:
- store = HDFStore(self.scratchpath)
+
+ with ensure_clean(self.path) as store:
store['frame'] = frame
recons = store['frame']
self.assert_(recons.index.equals(rng))
self.assertEquals(rng.tz, recons.index.tz)
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_fixed_offset_tz(self):
rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- try:
- store = HDFStore(self.scratchpath)
+
+ with ensure_clean(self.path) as store:
store['frame'] = frame
recons = store['frame']
self.assert_(recons.index.equals(rng))
self.assertEquals(rng.tz, recons.index.tz)
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_store_hierarchical(self):
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
@@ -1378,41 +1460,31 @@ def test_store_hierarchical(self):
self._check_roundtrip(frame['A'], tm.assert_series_equal)
# check that the names are stored
- try:
- store = HDFStore(self.scratchpath)
+ with ensure_clean(self.path) as store:
store['frame'] = frame
recons = store['frame']
assert(recons.index.names == ['foo', 'bar'])
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_store_index_name(self):
df = tm.makeDataFrame()
df.index.name = 'foo'
- try:
- store = HDFStore(self.scratchpath)
+
+ with ensure_clean(self.path) as store:
store['frame'] = df
recons = store['frame']
assert(recons.index.name == 'foo')
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_store_series_name(self):
df = tm.makeDataFrame()
series = df['A']
- try:
- store = HDFStore(self.scratchpath)
+ with ensure_clean(self.path) as store:
store['series'] = series
recons = store['series']
assert(recons.name == 'A')
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_store_mixed(self):
+
def _make_one():
df = tm.makeDataFrame()
df['obj1'] = 'foo'
@@ -1429,16 +1501,17 @@ def _make_one():
self._check_roundtrip(df1, tm.assert_frame_equal)
self._check_roundtrip(df2, tm.assert_frame_equal)
- self.store['obj'] = df1
- tm.assert_frame_equal(self.store['obj'], df1)
- self.store['obj'] = df2
- tm.assert_frame_equal(self.store['obj'], df2)
-
+ with ensure_clean(self.path) as store:
+ store['obj'] = df1
+ tm.assert_frame_equal(store['obj'], df1)
+ store['obj'] = df2
+ tm.assert_frame_equal(store['obj'], df2)
+
# check that can store Series of all of these types
self._check_roundtrip(df1['obj1'], tm.assert_series_equal)
self._check_roundtrip(df1['bool1'], tm.assert_series_equal)
self._check_roundtrip(df1['int1'], tm.assert_series_equal)
-
+
# try with compression
self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
compression=True)
@@ -1450,25 +1523,23 @@ def _make_one():
compression=True)
def test_wide(self):
+
wp = tm.makePanel()
self._check_roundtrip(wp, tm.assert_panel_equal)
def test_wide_table(self):
+
wp = tm.makePanel()
self._check_roundtrip_table(wp, tm.assert_panel_equal)
def test_wide_table_dups(self):
wp = tm.makePanel()
- try:
- store = HDFStore(self.scratchpath)
+ with ensure_clean(self.path) as store:
store._quiet = True
store.put('panel', wp, table=True)
store.put('panel', wp, table=True, append=True)
recons = store['panel']
tm.assert_panel_equal(recons, wp)
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_long(self):
def _check(left, right):
@@ -1484,220 +1555,234 @@ def test_longpanel(self):
pass
def test_overwrite_node(self):
- self.store['a'] = tm.makeTimeDataFrame()
- ts = tm.makeTimeSeries()
- self.store['a'] = ts
- tm.assert_series_equal(self.store['a'], ts)
+ with ensure_clean(self.path) as store:
+ store['a'] = tm.makeTimeDataFrame()
+ ts = tm.makeTimeSeries()
+ store['a'] = ts
+
+ tm.assert_series_equal(store['a'], ts)
def test_select(self):
wp = tm.makePanel()
- # put/select ok
- self.store.remove('wp')
- self.store.put('wp', wp, table=True)
- self.store.select('wp')
-
- # non-table ok (where = None)
- self.store.remove('wp')
- self.store.put('wp2', wp, table=False)
- self.store.select('wp2')
-
- # selection on the non-indexable with a large number of columns
- wp = Panel(
- np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)],
- major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)])
-
- self.store.remove('wp')
- self.store.append('wp', wp)
- items = ['Item%03d' % i for i in xrange(80)]
- result = self.store.select('wp', Term('items', items))
- expected = wp.reindex(items=items)
- tm.assert_panel_equal(expected, result)
-
- # selectin non-table with a where
- # self.assertRaises(Exception, self.store.select,
- # 'wp2', ('column', ['A', 'D']))
-
- # select with columns=
- df = tm.makeTimeDataFrame()
- self.store.remove('df')
- self.store.append('df', df)
- result = self.store.select('df', columns=['A', 'B'])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
-
- # equivalentsly
- result = self.store.select('df', [('columns', ['A', 'B'])])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
-
- # with a data column
- self.store.remove('df')
- self.store.append('df', df, data_columns=['A'])
- result = self.store.select('df', ['A > 0'], columns=['A', 'B'])
- expected = df[df.A > 0].reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
-
- # all a data columns
- self.store.remove('df')
- self.store.append('df', df, data_columns=True)
- result = self.store.select('df', ['A > 0'], columns=['A', 'B'])
- expected = df[df.A > 0].reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
-
- # with a data column, but different columns
- self.store.remove('df')
- self.store.append('df', df, data_columns=['A'])
- result = self.store.select('df', ['A > 0'], columns=['C', 'D'])
- expected = df[df.A > 0].reindex(columns=['C', 'D'])
- tm.assert_frame_equal(expected, result)
-
- # with a Timestamp data column (GH #2637)
- df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
- self.store.remove('df')
- self.store.append('df', df, data_columns=['ts', 'A'])
- result = self.store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))])
- expected = df[df.ts >= Timestamp('2012-02-01')]
- tm.assert_frame_equal(expected, result)
+ with ensure_clean(self.path) as store:
+
+ # put/select ok
+ store.remove('wp')
+ store.put('wp', wp, table=True)
+ store.select('wp')
+
+ # non-table ok (where = None)
+ store.remove('wp')
+ store.put('wp2', wp, table=False)
+ store.select('wp2')
+
+ # selection on the non-indexable with a large number of columns
+ wp = Panel(
+ np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)],
+ major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)])
+
+ store.remove('wp')
+ store.append('wp', wp)
+ items = ['Item%03d' % i for i in xrange(80)]
+ result = store.select('wp', Term('items', items))
+ expected = wp.reindex(items=items)
+ tm.assert_panel_equal(expected, result)
+
+ # selectin non-table with a where
+ # self.assertRaises(Exception, store.select,
+ # 'wp2', ('column', ['A', 'D']))
+
+ # select with columns=
+ df = tm.makeTimeDataFrame()
+ store.remove('df')
+ store.append('df', df)
+ result = store.select('df', columns=['A', 'B'])
+ expected = df.reindex(columns=['A', 'B'])
+ tm.assert_frame_equal(expected, result)
+
+ # equivalentsly
+ result = store.select('df', [('columns', ['A', 'B'])])
+ expected = df.reindex(columns=['A', 'B'])
+ tm.assert_frame_equal(expected, result)
+
+ # with a data column
+ store.remove('df')
+ store.append('df', df, data_columns=['A'])
+ result = store.select('df', ['A > 0'], columns=['A', 'B'])
+ expected = df[df.A > 0].reindex(columns=['A', 'B'])
+ tm.assert_frame_equal(expected, result)
+
+ # all a data columns
+ store.remove('df')
+ store.append('df', df, data_columns=True)
+ result = store.select('df', ['A > 0'], columns=['A', 'B'])
+ expected = df[df.A > 0].reindex(columns=['A', 'B'])
+ tm.assert_frame_equal(expected, result)
+
+ # with a data column, but different columns
+ store.remove('df')
+ store.append('df', df, data_columns=['A'])
+ result = store.select('df', ['A > 0'], columns=['C', 'D'])
+ expected = df[df.A > 0].reindex(columns=['C', 'D'])
+ tm.assert_frame_equal(expected, result)
+
+ # with a Timestamp data column (GH #2637)
+ df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
+ store.remove('df')
+ store.append('df', df, data_columns=['ts', 'A'])
+ result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))])
+ expected = df[df.ts >= Timestamp('2012-02-01')]
+ tm.assert_frame_equal(expected, result)
def test_panel_select(self):
- wp = tm.makePanel()
- self.store.put('wp', wp, table=True)
- date = wp.major_axis[len(wp.major_axis) // 2]
-
- crit1 = ('major_axis', '>=', date)
- crit2 = ('minor_axis', '=', ['A', 'D'])
- result = self.store.select('wp', [crit1, crit2])
- expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
- tm.assert_panel_equal(result, expected)
-
- result = self.store.select(
- 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])])
- expected = wp.truncate(before='20000124').reindex(minor=['A', 'B'])
- tm.assert_panel_equal(result, expected)
+ wp = tm.makePanel()
+ with ensure_clean(self.path) as store:
+ store.put('wp', wp, table=True)
+ date = wp.major_axis[len(wp.major_axis) // 2]
+
+ crit1 = ('major_axis', '>=', date)
+ crit2 = ('minor_axis', '=', ['A', 'D'])
+
+ result = store.select('wp', [crit1, crit2])
+ expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
+ tm.assert_panel_equal(result, expected)
+
+ result = store.select(
+ 'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])])
+ expected = wp.truncate(before='20000124').reindex(minor=['A', 'B'])
+ tm.assert_panel_equal(result, expected)
+
def test_frame_select(self):
- df = tm.makeTimeDataFrame()
- self.store.put('frame', df, table=True)
- date = df.index[len(df) // 2]
-
- crit1 = ('index', '>=', date)
- crit2 = ('columns', ['A', 'D'])
- crit3 = ('columns', 'A')
-
- result = self.store.select('frame', [crit1, crit2])
- expected = df.ix[date:, ['A', 'D']]
- tm.assert_frame_equal(result, expected)
-
- result = self.store.select('frame', [crit3])
- expected = df.ix[:, ['A']]
- tm.assert_frame_equal(result, expected)
-
- # other indicies for a frame
-
- # integer
- df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
- self.store.append('df_int', df)
- self.store.select(
- 'df_int', [Term("index<10"), Term("columns", "=", ["A"])])
- df = DataFrame(dict(A=np.random.rand(
- 20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
- self.store.append('df_float', df)
- self.store.select(
- 'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
-
- # invalid terms
df = tm.makeTimeDataFrame()
- self.store.append('df_time', df)
- self.assertRaises(
- Exception, self.store.select, 'df_time', [Term("index>0")])
-
- # can't select if not written as table
- # self.store['frame'] = df
- # self.assertRaises(Exception, self.store.select,
- # 'frame', [crit1, crit2])
+ with ensure_clean(self.path) as store:
+ store.put('frame', df, table=True)
+ date = df.index[len(df) // 2]
+
+ crit1 = ('index', '>=', date)
+ crit2 = ('columns', ['A', 'D'])
+ crit3 = ('columns', 'A')
+
+ result = store.select('frame', [crit1, crit2])
+ expected = df.ix[date:, ['A', 'D']]
+ tm.assert_frame_equal(result, expected)
+
+ result = store.select('frame', [crit3])
+ expected = df.ix[:, ['A']]
+ tm.assert_frame_equal(result, expected)
+
+ # other indicies for a frame
+
+ # integer
+ df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
+ store.append('df_int', df)
+ store.select(
+ 'df_int', [Term("index<10"), Term("columns", "=", ["A"])])
+
+ df = DataFrame(dict(A=np.random.rand(
+ 20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
+ store.append('df_float', df)
+ store.select(
+ 'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
+
+ # invalid terms
+ df = tm.makeTimeDataFrame()
+ store.append('df_time', df)
+ self.assertRaises(
+ Exception, store.select, 'df_time', [Term("index>0")])
+
+ # can't select if not written as table
+ # store['frame'] = df
+ # self.assertRaises(Exception, store.select,
+ # 'frame', [crit1, crit2])
+
def test_unique(self):
+
df = tm.makeTimeDataFrame()
def check(x, y):
self.assert_((np.unique(x) == np.unique(y)).all() == True)
- self.store.remove('df')
- self.store.append('df', df)
-
- # error
- self.assertRaises(KeyError, self.store.unique, 'df', 'foo')
-
- # valid
- result = self.store.unique('df', 'index')
- check(result.values, df.index.values)
-
- # not a data indexable column
- self.assertRaises(
- ValueError, self.store.unique, 'df', 'values_block_0')
-
- # a data column
- df2 = df.copy()
- df2['string'] = 'foo'
- self.store.append('df2', df2, data_columns=['string'])
- result = self.store.unique('df2', 'string')
- check(result.values, df2['string'].unique())
-
- # a data column with NaNs, result excludes the NaNs
- df3 = df.copy()
- df3['string'] = 'foo'
- df3.ix[4:6, 'string'] = np.nan
- self.store.append('df3', df3, data_columns=['string'])
- result = self.store.unique('df3', 'string')
- check(result.values, df3['string'].valid().unique())
+ with ensure_clean(self.path) as store:
+ store.remove('df')
+ store.append('df', df)
+
+ # error
+ self.assertRaises(KeyError, store.unique, 'df', 'foo')
+
+ # valid
+ result = store.unique('df', 'index')
+ check(result.values, df.index.values)
+
+ # not a data indexable column
+ self.assertRaises(
+ ValueError, store.unique, 'df', 'values_block_0')
+
+ # a data column
+ df2 = df.copy()
+ df2['string'] = 'foo'
+ store.append('df2', df2, data_columns=['string'])
+ result = store.unique('df2', 'string')
+ check(result.values, df2['string'].unique())
+
+ # a data column with NaNs, result excludes the NaNs
+ df3 = df.copy()
+ df3['string'] = 'foo'
+ df3.ix[4:6, 'string'] = np.nan
+ store.append('df3', df3, data_columns=['string'])
+ result = store.unique('df3', 'string')
+ check(result.values, df3['string'].valid().unique())
def test_coordinates(self):
df = tm.makeTimeDataFrame()
- self.store.remove('df')
- self.store.append('df', df)
-
- # all
- c = self.store.select_as_coordinates('df')
- assert((c.values == np.arange(len(df.index))).all() == True)
-
- # get coordinates back & test vs frame
- self.store.remove('df')
-
- df = DataFrame(dict(A=range(5), B=range(5)))
- self.store.append('df', df)
- c = self.store.select_as_coordinates('df', ['index<3'])
- assert((c.values == np.arange(3)).all() == True)
- result = self.store.select('df', where=c)
- expected = df.ix[0:2, :]
- tm.assert_frame_equal(result, expected)
-
- c = self.store.select_as_coordinates('df', ['index>=3', 'index<=4'])
- assert((c.values == np.arange(2) + 3).all() == True)
- result = self.store.select('df', where=c)
- expected = df.ix[3:4, :]
- tm.assert_frame_equal(result, expected)
-
- # multiple tables
- self.store.remove('df1')
- self.store.remove('df2')
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- self.store.append('df1', df1, data_columns=['A', 'B'])
- self.store.append('df2', df2)
-
- c = self.store.select_as_coordinates('df1', ['A>0', 'B>0'])
- df1_result = self.store.select('df1', c)
- df2_result = self.store.select('df2', c)
- result = concat([df1_result, df2_result], axis=1)
+ with ensure_clean(self.path) as store:
- expected = concat([df1, df2], axis=1)
- expected = expected[(expected.A > 0) & (expected.B > 0)]
- tm.assert_frame_equal(result, expected)
+ store.remove('df')
+ store.append('df', df)
+
+ # all
+ c = store.select_as_coordinates('df')
+ assert((c.values == np.arange(len(df.index))).all() == True)
+
+ # get coordinates back & test vs frame
+ store.remove('df')
+
+ df = DataFrame(dict(A=range(5), B=range(5)))
+ store.append('df', df)
+ c = store.select_as_coordinates('df', ['index<3'])
+ assert((c.values == np.arange(3)).all() == True)
+ result = store.select('df', where=c)
+ expected = df.ix[0:2, :]
+ tm.assert_frame_equal(result, expected)
+
+ c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
+ assert((c.values == np.arange(2) + 3).all() == True)
+ result = store.select('df', where=c)
+ expected = df.ix[3:4, :]
+ tm.assert_frame_equal(result, expected)
+
+ # multiple tables
+ store.remove('df1')
+ store.remove('df2')
+ df1 = tm.makeTimeDataFrame()
+ df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
+ store.append('df1', df1, data_columns=['A', 'B'])
+ store.append('df2', df2)
+
+ c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
+ df1_result = store.select('df1', c)
+ df2_result = store.select('df2', c)
+ result = concat([df1_result, df2_result], axis=1)
+
+ expected = concat([df1, df2], axis=1)
+ expected = expected[(expected.A > 0) & (expected.B > 0)]
+ tm.assert_frame_equal(result, expected)
def test_append_to_multiple(self):
df1 = tm.makeTimeDataFrame()
@@ -1705,102 +1790,109 @@ def test_append_to_multiple(self):
df2['foo'] = 'bar'
df = concat([df1, df2], axis=1)
- # exceptions
- self.assertRaises(Exception, self.store.append_to_multiple, {'df1':
- ['A', 'B'], 'df2': None}, df, selector='df3')
- self.assertRaises(Exception, self.store.append_to_multiple,
- {'df1': None, 'df2': None}, df, selector='df3')
- self.assertRaises(
- Exception, self.store.append_to_multiple, 'df1', df, 'df1')
-
- # regular operation
- self.store.append_to_multiple(
- {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
- result = self.store.select_as_multiple(
- ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
- expected = df[(df.A > 0) & (df.B > 0)]
- tm.assert_frame_equal(result, expected)
+ with ensure_clean(self.path) as store:
+ # exceptions
+ self.assertRaises(Exception, store.append_to_multiple,
+ {'df1': ['A', 'B'], 'df2': None}, df, selector='df3')
+ self.assertRaises(Exception, store.append_to_multiple,
+ {'df1': None, 'df2': None}, df, selector='df3')
+ self.assertRaises(
+ Exception, store.append_to_multiple, 'df1', df, 'df1')
+
+ # regular operation
+ store.append_to_multiple(
+ {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
+ result = store.select_as_multiple(
+ ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
+ expected = df[(df.A > 0) & (df.B > 0)]
+ tm.assert_frame_equal(result, expected)
+
def test_select_as_multiple(self):
+
df1 = tm.makeTimeDataFrame()
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
df2['foo'] = 'bar'
- self.store.append('df1', df1, data_columns=['A', 'B'])
- self.store.append('df2', df2)
-
- # exceptions
- self.assertRaises(Exception, self.store.select_as_multiple,
- None, where=['A>0', 'B>0'], selector='df1')
- self.assertRaises(Exception, self.store.select_as_multiple,
- [None], where=['A>0', 'B>0'], selector='df1')
-
- # default select
- result = self.store.select('df1', ['A>0', 'B>0'])
- expected = self.store.select_as_multiple(
- ['df1'], where=['A>0', 'B>0'], selector='df1')
- tm.assert_frame_equal(result, expected)
- expected = self.store.select_as_multiple(
- 'df1', where=['A>0', 'B>0'], selector='df1')
- tm.assert_frame_equal(result, expected)
-
- # multiple
- result = self.store.select_as_multiple(
- ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
- expected = concat([df1, df2], axis=1)
- expected = expected[(expected.A > 0) & (expected.B > 0)]
- tm.assert_frame_equal(result, expected)
-
- # multiple (diff selector)
- result = self.store.select_as_multiple(['df1', 'df2'], where=[Term(
- 'index', '>', df2.index[4])], selector='df2')
- expected = concat([df1, df2], axis=1)
- expected = expected[5:]
- tm.assert_frame_equal(result, expected)
-
- # test excpection for diff rows
- self.store.append('df3', tm.makeTimeDataFrame(nper=50))
- self.assertRaises(Exception, self.store.select_as_multiple, ['df1',
- 'df3'], where=['A>0', 'B>0'], selector='df1')
- def test_start_stop(self):
+ with ensure_clean(self.path) as store:
+ store.append('df1', df1, data_columns=['A', 'B'])
+ store.append('df2', df2)
+
+ # exceptions
+ self.assertRaises(Exception, store.select_as_multiple,
+ None, where=['A>0', 'B>0'], selector='df1')
+ self.assertRaises(Exception, store.select_as_multiple,
+ [None], where=['A>0', 'B>0'], selector='df1')
+
+ # default select
+ result = store.select('df1', ['A>0', 'B>0'])
+ expected = store.select_as_multiple(
+ ['df1'], where=['A>0', 'B>0'], selector='df1')
+ tm.assert_frame_equal(result, expected)
+ expected = store.select_as_multiple(
+ 'df1', where=['A>0', 'B>0'], selector='df1')
+ tm.assert_frame_equal(result, expected)
+
+ # multiple
+ result = store.select_as_multiple(
+ ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
+ expected = concat([df1, df2], axis=1)
+ expected = expected[(expected.A > 0) & (expected.B > 0)]
+ tm.assert_frame_equal(result, expected)
+
+ # multiple (diff selector)
+ result = store.select_as_multiple(['df1', 'df2'], where=[Term(
+ 'index', '>', df2.index[4])], selector='df2')
+ expected = concat([df1, df2], axis=1)
+ expected = expected[5:]
+ tm.assert_frame_equal(result, expected)
+
+ # test excpection for diff rows
+ store.append('df3', tm.makeTimeDataFrame(nper=50))
+ self.assertRaises(Exception, store.select_as_multiple,
+ ['df1','df3'], where=['A>0', 'B>0'], selector='df1')
- df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
- self.store.append('df', df)
+ def test_start_stop(self):
- result = self.store.select(
- 'df', [Term("columns", "=", ["A"])], start=0, stop=5)
- expected = df.ix[0:4, ['A']]
- tm.assert_frame_equal(result, expected)
+ with ensure_clean(self.path) as store:
- # out of range
- result = self.store.select(
- 'df', [Term("columns", "=", ["A"])], start=30, stop=40)
- assert(len(result) == 0)
- assert(type(result) == DataFrame)
+ df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
+ store.append('df', df)
+
+ result = store.select(
+ 'df', [Term("columns", "=", ["A"])], start=0, stop=5)
+ expected = df.ix[0:4, ['A']]
+ tm.assert_frame_equal(result, expected)
+
+ # out of range
+ result = store.select(
+ 'df', [Term("columns", "=", ["A"])], start=30, stop=40)
+ assert(len(result) == 0)
+ assert(type(result) == DataFrame)
def test_select_filter_corner(self):
+
df = DataFrame(np.random.randn(50, 100))
df.index = ['%.3d' % c for c in df.index]
df.columns = ['%.3d' % c for c in df.columns]
- self.store.put('frame', df, table=True)
- crit = Term('columns', df.columns[:75])
- result = self.store.select('frame', [crit])
- tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
+ with ensure_clean(self.path) as store:
+ store.put('frame', df, table=True)
+ crit = Term('columns', df.columns[:75])
+ result = store.select('frame', [crit])
+ tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
+
def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
+
options = {}
if compression:
options['complib'] = _default_compressor
- store = HDFStore(self.scratchpath, 'w', **options)
- try:
+ with ensure_clean(self.path, 'w', **options) as store:
store['obj'] = obj
retrieved = store['obj']
comparator(retrieved, obj, **kwargs)
- finally:
- store.close()
- os.remove(self.scratchpath)
def _check_double_roundtrip(self, obj, comparator, compression=False,
**kwargs):
@@ -1808,84 +1900,90 @@ def _check_double_roundtrip(self, obj, comparator, compression=False,
if compression:
options['complib'] = _default_compressor
- store = HDFStore(self.scratchpath, 'w', **options)
- try:
+ with ensure_clean(self.path, 'w', **options) as store:
store['obj'] = obj
retrieved = store['obj']
comparator(retrieved, obj, **kwargs)
store['obj'] = retrieved
again = store['obj']
comparator(again, obj, **kwargs)
- finally:
- store.close()
- os.remove(self.scratchpath)
+
def _check_roundtrip_table(self, obj, comparator, compression=False):
options = {}
if compression:
options['complib'] = _default_compressor
- store = HDFStore(self.scratchpath, 'w', **options)
- try:
+ with ensure_clean(self.path, 'w', **options) as store:
store.put('obj', obj, table=True)
retrieved = store['obj']
# sorted_obj = _test_sort(obj)
comparator(retrieved, obj)
- finally:
- store.close()
- os.remove(self.scratchpath)
def test_pytables_native_read(self):
pth = curpath()
- store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r')
- d2 = store['detector/readout']
- store.close()
- store = HDFStore(os.path.join(pth, 'pytables_native2.h5'), 'r')
- str(store)
- d1 = store['detector']
- store.close()
+
+ try:
+ store = HDFStore(os.path.join(pth, 'pytables_native.h5'), 'r')
+ d2 = store['detector/readout']
+ finally:
+ safe_close(store)
+
+ try:
+ store = HDFStore(os.path.join(pth, 'pytables_native2.h5'), 'r')
+ str(store)
+ d1 = store['detector']
+ finally:
+ safe_close(store)
def test_legacy_read(self):
pth = curpath()
- store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
- store['a']
- store['b']
- store['c']
- store['d']
- store.close()
+ try:
+ store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
+ store['a']
+ store['b']
+ store['c']
+ store['d']
+ finally:
+ safe_close(store)
def test_legacy_table_read(self):
# legacy table types
pth = curpath()
- store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
- store.select('df1')
- store.select('df2')
- store.select('wp1')
-
- # force the frame
- store.select('df2', typ='legacy_frame')
-
- # old version warning
- warnings.filterwarnings('ignore', category=IncompatibilityWarning)
- self.assertRaises(
- Exception, store.select, 'wp1', Term('minor_axis', '=', 'B'))
-
- df2 = store.select('df2')
- store.select('df2', Term('index', '>', df2.index[2]))
- warnings.filterwarnings('always', category=IncompatibilityWarning)
+ try:
+ store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
+ store.select('df1')
+ store.select('df2')
+ store.select('wp1')
+
+ # force the frame
+ store.select('df2', typ='legacy_frame')
+
+ # old version warning
+ warnings.filterwarnings('ignore', category=IncompatibilityWarning)
+ self.assertRaises(
+ Exception, store.select, 'wp1', Term('minor_axis', '=', 'B'))
- store.close()
+ df2 = store.select('df2')
+ store.select('df2', Term('index', '>', df2.index[2]))
+ warnings.filterwarnings('always', category=IncompatibilityWarning)
+
+ finally:
+ safe_close(store)
def test_legacy_0_10_read(self):
# legacy from 0.10
pth = curpath()
- store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r')
- for k in store.keys():
- store.select(k)
- store.close()
+ try:
+ store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r')
+ for k in store.keys():
+ store.select(k)
+ finally:
+ safe_close(store)
def test_copy(self):
pth = curpath()
+
def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
try:
import os
@@ -1893,6 +1991,7 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
if f is None:
f = os.path.join(pth, 'legacy_0.10.h5')
+
store = HDFStore(f, 'r')
if new_f is None:
@@ -1920,13 +2019,9 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
except (Exception), detail:
pass
finally:
- store.close()
- tstore.close()
- import os
- try:
- os.remove(new_f)
- except:
- pass
+ safe_close(store)
+ safe_close(tstore)
+ safe_remove(new_f)
do_copy()
do_copy(keys = ['df'])
@@ -1934,18 +2029,19 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
# new table
df = tm.makeDataFrame()
+
try:
- st = HDFStore(self.scratchpath)
+ st = HDFStore(self.path)
st.append('df', df, data_columns = ['A'])
st.close()
- do_copy(f = self.scratchpath)
- do_copy(f = self.scratchpath, propindexes = False)
+ do_copy(f = self.path)
+ do_copy(f = self.path, propindexes = False)
finally:
- import os
- os.remove(self.scratchpath)
+ safe_remove(self.path)
def test_legacy_table_write(self):
raise nose.SkipTest
+
# legacy table types
pth = curpath()
df = tm.makeDataFrame()
@@ -1959,71 +2055,78 @@ def test_legacy_table_write(self):
store.close()
def test_store_datetime_fractional_secs(self):
- dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
- series = Series([0], [dt])
- self.store['a'] = series
- self.assertEquals(self.store['a'].index[0], dt)
-
- def test_tseries_indices_series(self):
- idx = tm.makeDateIndex(10)
- ser = Series(np.random.randn(len(idx)), idx)
- self.store['a'] = ser
- result = self.store['a']
- assert_series_equal(result, ser)
- self.assertEquals(type(result.index), type(ser.index))
- self.assertEquals(result.index.freq, ser.index.freq)
+ with ensure_clean(self.path) as store:
+ dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
+ series = Series([0], [dt])
+ store['a'] = series
+ self.assertEquals(store['a'].index[0], dt)
- idx = tm.makePeriodIndex(10)
- ser = Series(np.random.randn(len(idx)), idx)
- self.store['a'] = ser
- result = self.store['a']
+ def test_tseries_indices_series(self):
- assert_series_equal(result, ser)
- self.assertEquals(type(result.index), type(ser.index))
- self.assertEquals(result.index.freq, ser.index.freq)
+ with ensure_clean(self.path) as store:
+ idx = tm.makeDateIndex(10)
+ ser = Series(np.random.randn(len(idx)), idx)
+ store['a'] = ser
+ result = store['a']
+
+ assert_series_equal(result, ser)
+ self.assertEquals(type(result.index), type(ser.index))
+ self.assertEquals(result.index.freq, ser.index.freq)
+
+ idx = tm.makePeriodIndex(10)
+ ser = Series(np.random.randn(len(idx)), idx)
+ store['a'] = ser
+ result = store['a']
+
+ assert_series_equal(result, ser)
+ self.assertEquals(type(result.index), type(ser.index))
+ self.assertEquals(result.index.freq, ser.index.freq)
def test_tseries_indices_frame(self):
- idx = tm.makeDateIndex(10)
- df = DataFrame(np.random.randn(len(idx), 3), index=idx)
- self.store['a'] = df
- result = self.store['a']
-
- assert_frame_equal(result, df)
- self.assertEquals(type(result.index), type(df.index))
- self.assertEquals(result.index.freq, df.index.freq)
-
- idx = tm.makePeriodIndex(10)
- df = DataFrame(np.random.randn(len(idx), 3), idx)
- self.store['a'] = df
- result = self.store['a']
- assert_frame_equal(result, df)
- self.assertEquals(type(result.index), type(df.index))
- self.assertEquals(result.index.freq, df.index.freq)
+ with ensure_clean(self.path) as store:
+ idx = tm.makeDateIndex(10)
+ df = DataFrame(np.random.randn(len(idx), 3), index=idx)
+ store['a'] = df
+ result = store['a']
+
+ assert_frame_equal(result, df)
+ self.assertEquals(type(result.index), type(df.index))
+ self.assertEquals(result.index.freq, df.index.freq)
+
+ idx = tm.makePeriodIndex(10)
+ df = DataFrame(np.random.randn(len(idx), 3), idx)
+ store['a'] = df
+ result = store['a']
+
+ assert_frame_equal(result, df)
+ self.assertEquals(type(result.index), type(df.index))
+ self.assertEquals(result.index.freq, df.index.freq)
def test_unicode_index(self):
+
unicode_values = [u'\u03c3', u'\u03c3\u03c3']
warnings.filterwarnings('ignore', category=PerformanceWarning)
s = Series(np.random.randn(len(unicode_values)), unicode_values)
self._check_roundtrip(s, tm.assert_series_equal)
warnings.filterwarnings('always', category=PerformanceWarning)
-
def test_store_datetime_mixed(self):
+
df = DataFrame(
{'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
ts = tm.makeTimeSeries()
df['d'] = ts.index[:3]
self._check_roundtrip(df, tm.assert_frame_equal)
-
+
# def test_cant_write_multiindex_table(self):
# # for now, #1848
# df = DataFrame(np.random.randn(10, 4),
# index=[np.arange(5).repeat(2),
# np.tile(np.arange(2), 5)])
- # self.assertRaises(Exception, self.store.put, 'foo', df, table=True)
+ # self.assertRaises(Exception, store.put, 'foo', df, table=True)
def curpath():