From 6d25e6c9467aa5d4ce379ba56a537880429f494e Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 13:38:01 +0000 Subject: [PATCH 1/8] DOC: update the parquet docstring --- pandas/core/frame.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..250b67fecc186 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1697,10 +1697,12 @@ def to_parquet(self, fname, engine='auto', compression='snappy', .. versionadded:: 0.21.0 + Requires either fastparquet or pyarrow libraries. + Parameters ---------- fname : str - string file path + String file path. engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` @@ -1708,8 +1710,23 @@ def to_parquet(self, fname, engine='auto', compression='snappy', 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - kwargs - Additional keyword arguments passed to the engine + kwargs : dict + Additional keyword arguments passed to the engine. + + Examples + ---------- + >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_parquet('df.parquet.gzip', compression='gzip') + + Returns + ---------- + Nothing. + + See Also + -------- + DataFrame.to_csv : write a csv file. + DataFrame.to_sql : write to a sql table. + DataFrame.to_hdf : write to hdf. """ from pandas.io.parquet import to_parquet to_parquet(self, fname, engine, From 14ed668a1535f9a1ee43c30469a8ff9a06dbcdc4 Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 14:22:51 +0000 Subject: [PATCH 2/8] update order of sections; return type fixed --- pandas/core/frame.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 250b67fecc186..87f309a1864f6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1697,7 +1697,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', .. versionadded:: 0.21.0 - Requires either fastparquet or pyarrow libraries. + This function writes the dataframe as a parquet file. You + can choose different parquet backends, and have the option + of compressing. Parameters ---------- @@ -1713,20 +1715,24 @@ def to_parquet(self, fname, engine='auto', compression='snappy', kwargs : dict Additional keyword arguments passed to the engine. - Examples - ---------- - >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) - >>> df.to_parquet('df.parquet.gzip', compression='gzip') - Returns ---------- - Nothing. + None See Also -------- DataFrame.to_csv : write a csv file. DataFrame.to_sql : write to a sql table. DataFrame.to_hdf : write to hdf. + + Notes + ---------- + Requires either fastparquet or pyarrow libraries. + + Examples + ---------- + >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_parquet('df.parquet.gzip', compression='gzip') """ from pandas.io.parquet import to_parquet to_parquet(self, fname, engine, From 7b7f8bc6b3cf7bce93a5a56b90c6c300b701d743 Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 18:46:04 +0000 Subject: [PATCH 3/8] fix issues as suggested in discussion of pull request --- pandas/core/frame.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 87f309a1864f6..68c53711b6111 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1697,9 +1697,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', .. versionadded:: 0.21.0 - This function writes the dataframe as a parquet file. You - can choose different parquet backends, and have the option - of compressing. + This function writes the dataframe as a parquet file (see + :func:`pandas.io.parquet.to_parquet`). You can choose different parquet + backends, and have the option of compressing. Parameters ---------- @@ -1713,24 +1713,27 @@ def to_parquet(self, fname, engine='auto', compression='snappy', compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. kwargs : dict - Additional keyword arguments passed to the engine. + Additional keyword arguments passed to the parquet library. See + the documentation for :func:`pandas.io.parquet.to_parquet` for + complete details. Returns - ---------- + ------- None See Also -------- - DataFrame.to_csv : write a csv file. - DataFrame.to_sql : write to a sql table. - DataFrame.to_hdf : write to hdf. + read_parquet : Read a parquet file. + DataFrame.to_csv : Write a csv file. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_hdf : Write to hdf. Notes - ---------- - Requires either fastparquet or pyarrow libraries. + ----- + This function requires either the fastparquet or pyarrow library. Examples - ---------- + -------- >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_parquet('df.parquet.gzip', compression='gzip') """ From 7ec853724b4f19c73ace9fd02d5e9a6bec91f627 Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 19:03:52 +0000 Subject: [PATCH 4/8] ignore validation error for **kwargs argument as per jorisvandenbossche --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68c53711b6111..cb9be68da2440 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1712,8 +1712,8 @@ def to_parquet(self, fname, engine='auto', compression='snappy', 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - kwargs : dict - Additional keyword arguments passed to the parquet library. See + **kwargs + Additional arguments passed to the parquet library. See the documentation for :func:`pandas.io.parquet.to_parquet` for complete details. From 39423cde3cf0f58b0423e762d6e38c01c1c56f56 Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 19:45:57 +0000 Subject: [PATCH 5/8] update links as per feedback on pull request --- pandas/core/frame.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cb9be68da2440..5874ecf6e00d4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1697,9 +1697,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', .. versionadded:: 0.21.0 - This function writes the dataframe as a parquet file (see - :func:`pandas.io.parquet.to_parquet`). You can choose different parquet - backends, and have the option of compressing. + This function writes the dataframe as a `parquet file + `_. You can choose different parquet + backends, and have the option of compression. Parameters ---------- @@ -1714,8 +1714,7 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Name of the compression to use. Use ``None`` for no compression. **kwargs Additional arguments passed to the parquet library. See - the documentation for :func:`pandas.io.parquet.to_parquet` for - complete details. + :ref:`pandas io ` for more details. Returns ------- @@ -1730,7 +1729,7 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Notes ----- - This function requires either the fastparquet or pyarrow library. + This function requires either the `fastparquet `_ or `pyarrow `_ library. Examples -------- From d08f6b2bf231418ef8d3c37845ea43e4e601445d Mon Sep 17 00:00:00 2001 From: benman1 Date: Sat, 10 Mar 2018 19:51:10 +0000 Subject: [PATCH 6/8] fix pep8 long line --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5874ecf6e00d4..bd3c8a17ae728 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1729,7 +1729,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Notes ----- - This function requires either the `fastparquet `_ or `pyarrow `_ library. + This function requires either the `fastparquet + `_ or `pyarrow + `_ library. Examples -------- From b5329d2f540deb8716868c4b5b8907843ad43b24 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 10 Mar 2018 21:05:15 +0100 Subject: [PATCH 7/8] remove return section --- pandas/core/frame.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bd3c8a17ae728..a4a5d4db9555d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1699,7 +1699,8 @@ def to_parquet(self, fname, engine='auto', compression='snappy', This function writes the dataframe as a `parquet file `_. You can choose different parquet - backends, and have the option of compression. + backends, and have the option of compression. See + :ref:`the user guide ` for more details. Parameters ---------- @@ -1716,10 +1717,6 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Additional arguments passed to the parquet library. See :ref:`pandas io ` for more details. - Returns - ------- - None - See Also -------- read_parquet : Read a parquet file. From e65840cf1b7fd3342531c63d96ab4657e8ad5f24 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Mar 2018 12:03:06 +0100 Subject: [PATCH 8/8] Update frame.py --- pandas/core/frame.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a4a5d4db9555d..8151d6ca3b193 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1734,6 +1734,10 @@ def to_parquet(self, fname, engine='auto', compression='snappy', -------- >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_parquet('df.parquet.gzip', compression='gzip') + >>> pd.read_parquet('df.parquet.gzip') + col1 col2 + 0 1 3 + 1 2 4 """ from pandas.io.parquet import to_parquet to_parquet(self, fname, engine,