diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..efe0b22f3b046 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1786,40 +1786,50 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, index=index) def to_hdf(self, path_or_buf, key, **kwargs): - """Write the contained data to an HDF5 file using HDFStore. + """ + Write the contained data to an HDF5 file using HDFStore. + + Hierarchical Data Format (HDF) is self-describing, allowing an + application to interpret the structure and contents of a file with + no outside information. One HDF file can hold a mix of related objects + which can be accessed as a group or as individual objects. + + In order to add another DataFrame or Series to an existing HDF file + please use append mode and a different a key. + + For more information see the :ref:`user guide `. Parameters ---------- - path_or_buf : the path (string) or HDFStore object - key : string - identifier for the group in the store - mode : optional, {'a', 'w', 'r+'}, default 'a' - - ``'w'`` - Write; a new file is created (an existing file with the same - name would be deleted). - ``'a'`` - Append; an existing file is opened for reading and writing, - and if the file does not exist it is created. - ``'r+'`` - It is similar to ``'a'``, but the file must already exist. - format : 'fixed(f)|table(t)', default is 'fixed' - fixed(f) : Fixed format - Fast writing/reading. Not-appendable, nor searchable - table(t) : Table format - Write as a PyTables Table structure which may perform - worse but allow more flexible operations like searching - / selecting subsets of the data - append : boolean, default False - For Table formats, append the input data to the existing - data_columns : list of columns, or True, default None + path_or_buf : str or pandas.HDFStore + File path or HDFStore object. + key : str + Identifier for the group in the store. + mode : {'a', 'w', 'r+'}, default 'a' + Mode to open file: + + - 'w': write, a new file is created (an existing file with + the same name would be deleted). + - 'a': append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + - 'r+': similar to 'a', but the file must already exist. + format : {'fixed', 'table'}, default 'fixed' + Possible values: + + - 'fixed': Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + - 'table': Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. + append : bool, default False + For Table formats, append the input data to the existing. + data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See `here `__. - Applicable only to format='table'. - complevel : int, 0-9, default None + complevel : {0-9}, optional Specifies a compression level for data. A value of 0 disables compression. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' @@ -1831,11 +1841,49 @@ def to_hdf(self, path_or_buf, key, **kwargs): Specifying a compression library which is not available issues a ValueError. fletcher32 : bool, default False - If applying compression use the fletcher32 checksum - dropna : boolean, default False. + If applying compression use the fletcher32 checksum. + dropna : bool, default False If true, ALL nan rows will not be written to store. - """ + See Also + -------- + DataFrame.read_hdf : Read from HDF file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_feather : Write out feather-format for DataFrames. + DataFrame.to_csv : Write out to a csv file. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + ... index=['a', 'b', 'c']) + >>> df.to_hdf('data.h5', key='df', mode='w') + + We can add another object to the same file: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_hdf('data.h5', key='s') + + Reading from HDF file: + + >>> pd.read_hdf('data.h5', 'df') + A B + a 1 4 + b 2 5 + c 3 6 + >>> pd.read_hdf('data.h5', 's') + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + Deleting file with data: + + >>> import os + >>> os.remove('data.h5') + + """ from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs)