From 19bc38a66d3f62ffa82feff826ea06b8aa8dc97c Mon Sep 17 00:00:00 2001 From: acidburnburn Date: Sat, 10 Mar 2018 15:42:43 +0000 Subject: [PATCH 1/5] improved docstring for DataFrame.to_hdf with description and examples --- pandas/core/generic.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..c53c3e2cacf00 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1786,7 +1786,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, index=index) def to_hdf(self, path_or_buf, key, **kwargs): - """Write the contained data to an HDF5 file using HDFStore. + """ + Write the contained data to an HDF5 file using HDFStore. Parameters ---------- @@ -1834,6 +1835,42 @@ def to_hdf(self, path_or_buf, key, **kwargs): If applying compression use the fletcher32 checksum dropna : boolean, default False. If true, ALL nan rows will not be written to store. + + See Also + -------- + DataFrame.to_csv : write out to a csv file. + DataFrame.to_sql : write to a sql table. + DataFrame.to_feather : write out feather-format for DataFrames. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + ... index=['a', 'b', 'c']) + >>> df.to_hdf('data.h5', key='df', mode='w') + + We can append another object to the same file: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_hdf('data.h5', key='s') + + Reading from HDF file: + + >>> pd.read_hdf('data.h5', 'df') + A B + a 1 4 + b 2 5 + c 3 6 + >>> pd.read_hdf('data.h5', 's')) + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + Notes + ----- + Learn more about `Hierarchical Data Format (HDF) + `__. """ from pandas.io import pytables From 6d42dd15b64dd9ec647d6f1770ef702d656e7cae Mon Sep 17 00:00:00 2001 From: acidburnburn Date: Sun, 11 Mar 2018 00:34:12 +0000 Subject: [PATCH 2/5] moved Notes and added see also --- pandas/core/generic.py | 72 ++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c53c3e2cacf00..c91ba292cb38d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1789,38 +1789,44 @@ def to_hdf(self, path_or_buf, key, **kwargs): """ Write the contained data to an HDF5 file using HDFStore. + Hierarchical Data Format (HDF) is self-describing, allowing an + application to interpret the structure and contents of a file with + no outside information. One HDF file can hold a mix of related objects + which can be accessed as a group or as individual objects. + + In order to add another :class:`~pandas.DataFrame` or + :class:`~pandas.Series` to an existing HDF file please use append mode + and different a key. + Parameters ---------- - path_or_buf : the path (string) or HDFStore object - key : string - identifier for the group in the store - mode : optional, {'a', 'w', 'r+'}, default 'a' - - ``'w'`` - Write; a new file is created (an existing file with the same - name would be deleted). - ``'a'`` - Append; an existing file is opened for reading and writing, - and if the file does not exist it is created. - ``'r+'`` - It is similar to ``'a'``, but the file must already exist. - format : 'fixed(f)|table(t)', default is 'fixed' - fixed(f) : Fixed format - Fast writing/reading. Not-appendable, nor searchable - table(t) : Table format - Write as a PyTables Table structure which may perform - worse but allow more flexible operations like searching - / selecting subsets of the data + path_or_buf : str or pandas.HDFStore + File path or HDFStore object. + key : str + Identifier for the group in the store. + mode : {'a', 'w', 'r+'}, default is 'a' + Mode to open file: + - ``'w'``: write, a new file is created (an existing file with + the same name would be deleted). + - ``'a'``: append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + - `'r+'`: similar to ``'a'``, but the file must already exist. + format : {'fixed', 'table'}, default is 'fixed' + Possible values: + - fixed: Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + - table: Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. append : boolean, default False - For Table formats, append the input data to the existing - data_columns : list of columns, or True, default None + For Table formats, append the input data to the existing. + data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See `here `__. - Applicable only to format='table'. - complevel : int, 0-9, default None + complevel : {0-9}, optional Specifies a compression level for data. A value of 0 disables compression. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' @@ -1832,19 +1838,21 @@ def to_hdf(self, path_or_buf, key, **kwargs): Specifying a compression library which is not available issues a ValueError. fletcher32 : bool, default False - If applying compression use the fletcher32 checksum - dropna : boolean, default False. + If applying compression use the fletcher32 checksum. + dropna : bool, default False If true, ALL nan rows will not be written to store. See Also -------- - DataFrame.to_csv : write out to a csv file. + DataFrame.read_hdf : read from HDF file. + DataFrame.to_parquet : write a DataFrame to the binary parquet format. DataFrame.to_sql : write to a sql table. DataFrame.to_feather : write out feather-format for DataFrames. + DataFrame.to_csv : write out to a csv file. Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, ... index=['a', 'b', 'c']) >>> df.to_hdf('data.h5', key='df', mode='w') @@ -1860,19 +1868,13 @@ def to_hdf(self, path_or_buf, key, **kwargs): a 1 4 b 2 5 c 3 6 - >>> pd.read_hdf('data.h5', 's')) + >>> pd.read_hdf('data.h5', 's') 0 1 1 2 2 3 3 4 dtype: int64 - - Notes - ----- - Learn more about `Hierarchical Data Format (HDF) - `__. """ - from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) From 7781bb519b87ed2db4c5ed39f640e10baa8cdbcf Mon Sep 17 00:00:00 2001 From: acidburnburn Date: Wed, 21 Mar 2018 20:24:59 +0000 Subject: [PATCH 3/5] minor fixes based on PR comments --- pandas/core/generic.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c91ba292cb38d..a277a0456e7c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1798,6 +1798,8 @@ def to_hdf(self, path_or_buf, key, **kwargs): :class:`~pandas.Series` to an existing HDF file please use append mode and different a key. + For more information see the :ref:`user guide `. + Parameters ---------- path_or_buf : str or pandas.HDFStore @@ -1806,18 +1808,24 @@ def to_hdf(self, path_or_buf, key, **kwargs): Identifier for the group in the store. mode : {'a', 'w', 'r+'}, default is 'a' Mode to open file: - - ``'w'``: write, a new file is created (an existing file with - the same name would be deleted). - - ``'a'``: append, an existing file is opened for reading and - writing, and if the file does not exist it is created. - - `'r+'`: similar to ``'a'``, but the file must already exist. - format : {'fixed', 'table'}, default is 'fixed' + + - 'w': write, a new file is created (an existing file with + the same name would be deleted). + + - 'a': append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + + - 'r+': similar to 'a', but the file must already exist. + format : {'fixed', 'table'}, default 'fixed' + Possible values: - - fixed: Fixed format. Fast writing/reading. Not-appendable, - nor searchable. - - table: Table format. Write as a PyTables Table structure - which may perform worse but allow more flexible operations - like searching / selecting subsets of the data. + + - 'fixed': Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + + - 'table': Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. append : boolean, default False For Table formats, append the input data to the existing. data_columns : list of columns or True, optional @@ -1856,7 +1864,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): ... index=['a', 'b', 'c']) >>> df.to_hdf('data.h5', key='df', mode='w') - We can append another object to the same file: + We can add another object to the same file: >>> s = pd.Series([1, 2, 3, 4]) >>> s.to_hdf('data.h5', key='s') @@ -1874,6 +1882,12 @@ def to_hdf(self, path_or_buf, key, **kwargs): 2 3 3 4 dtype: int64 + + Deleting file with data: + + >>> import os + >>> os.remove('data.h5') + """ from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) From 429afbe501dc14476805543d692190a8ef1be7da Mon Sep 17 00:00:00 2001 From: acidburnburn Date: Wed, 21 Mar 2018 21:55:12 +0000 Subject: [PATCH 4/5] fixed reference to io section --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a277a0456e7c2..5137d052fb699 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1796,9 +1796,9 @@ def to_hdf(self, path_or_buf, key, **kwargs): In order to add another :class:`~pandas.DataFrame` or :class:`~pandas.Series` to an existing HDF file please use append mode - and different a key. + and a different a key. - For more information see the :ref:`user guide `. + For more information see the :ref:`user guide `. Parameters ---------- From eebfc397c9bbb71f43f9696f86c1b9d7264fdef0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Mar 2018 10:13:27 +0100 Subject: [PATCH 5/5] small fixup --- pandas/core/generic.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5137d052fb699..efe0b22f3b046 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1794,9 +1794,8 @@ def to_hdf(self, path_or_buf, key, **kwargs): no outside information. One HDF file can hold a mix of related objects which can be accessed as a group or as individual objects. - In order to add another :class:`~pandas.DataFrame` or - :class:`~pandas.Series` to an existing HDF file please use append mode - and a different a key. + In order to add another DataFrame or Series to an existing HDF file + please use append mode and a different a key. For more information see the :ref:`user guide `. @@ -1806,27 +1805,23 @@ def to_hdf(self, path_or_buf, key, **kwargs): File path or HDFStore object. key : str Identifier for the group in the store. - mode : {'a', 'w', 'r+'}, default is 'a' + mode : {'a', 'w', 'r+'}, default 'a' Mode to open file: - 'w': write, a new file is created (an existing file with the same name would be deleted). - - 'a': append, an existing file is opened for reading and writing, and if the file does not exist it is created. - - 'r+': similar to 'a', but the file must already exist. format : {'fixed', 'table'}, default 'fixed' - Possible values: - 'fixed': Fixed format. Fast writing/reading. Not-appendable, nor searchable. - - 'table': Table format. Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of the data. - append : boolean, default False + append : bool, default False For Table formats, append the input data to the existing. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk @@ -1852,11 +1847,11 @@ def to_hdf(self, path_or_buf, key, **kwargs): See Also -------- - DataFrame.read_hdf : read from HDF file. - DataFrame.to_parquet : write a DataFrame to the binary parquet format. - DataFrame.to_sql : write to a sql table. - DataFrame.to_feather : write out feather-format for DataFrames. - DataFrame.to_csv : write out to a csv file. + DataFrame.read_hdf : Read from HDF file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_feather : Write out feather-format for DataFrames. + DataFrame.to_csv : Write out to a csv file. Examples --------