Skip to content

Commit 6a95c81

Browse files
committed
update docs as per review
1 parent 8216486 commit 6a95c81

File tree

5 files changed

+28
-19
lines changed

5 files changed

+28
-19
lines changed

doc/source/io.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -4529,7 +4529,13 @@ Several caveats.
45294529
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
45304530
on an attempt at serialization.
45314531

4532-
See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/` and `fastparquet <https://fastparquet.readthedocs.io/en/latest/necessary>`
4532+
See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/`__ and `fastparquet <https://fastparquet.readthedocs.io/en/latest/>`__
4533+
4534+
.. note::
4535+
4536+
These engines are very similar and should read/write nearly identical parquet format files.
4537+
These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library).
4538+
TODO: differing options to write non-standard columns & null treatment
45334539

45344540
.. ipython:: python
45354541

doc/source/options.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,8 @@ io.hdf.default_format None default format writing format,
416416
'table'
417417
io.hdf.dropna_table True drop ALL nan rows when appending
418418
to a table
419+
io.parquet.engine pyarrow The engine to use as a default for
420+
parquet reading and writing.
419421
mode.chained_assignment warn Raise an exception, warn, or no
420422
action if trying to use chained
421423
assignment, The default is warn
@@ -538,4 +540,4 @@ Only ``'display.max_rows'`` are serialized and published.
538540
.. ipython:: python
539541
:suppress:
540542
541-
pd.reset_option('display.html.table_schema')
543+
pd.reset_option('display.html.table_schema')

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def _register_xlsx(engine, other):
471471
parquet_engine_doc = """
472472
: string
473473
The default parquet reader/writer engine. Available options:
474-
None, 'pyarrow', 'fastparquet'
474+
'pyarrow', 'fastparquet', the default is 'pyarrow'
475475
"""
476476

477477
with cf.config_prefix('io.parquet'):

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1531,9 +1531,9 @@ def to_parquet(self, fname, engine=None, compression=None,
15311531
----------
15321532
fname : str
15331533
string file path
1534-
engine : parquet engine
1535-
supported are {'pyarrow', 'fastparquet'}
1536-
if None, will use the option: io.parquet.engine
1534+
engine : str, optional
1535+
The parquet engine, one of {'pyarrow', 'fastparquet'}
1536+
if None, will use the option: `io.parquet.engine`
15371537
compression : str, optional
15381538
compression method, includes {'gzip', 'snappy', 'brotli'}
15391539
kwargs passed to the engine

pandas/io/parquet.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,16 @@ def read(self, path):
8181

8282
def to_parquet(df, path, engine=None, compression=None, **kwargs):
8383
"""
84-
Write a DataFrame to the pyarrow
84+
Write a DataFrame to the parquet format.
8585
8686
Parameters
8787
----------
8888
df : DataFrame
8989
path : string
9090
File path
91-
engine : parquet engine
92-
supported are {'pyarrow', 'fastparquet'}
93-
if None, will use the option: io.parquet.engine
91+
engine : str, optional
92+
The parquet engine, one of {'pyarrow', 'fastparquet'}
93+
if None, will use the option: `io.parquet.engine`
9494
compression : str, optional
9595
compression method, includes {'gzip', 'snappy', 'brotli'}
9696
kwargs are passed to the engine
@@ -110,15 +110,16 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):
110110
# raise on anything else as we don't serialize the index
111111

112112
if not isinstance(df.index, Int64Index):
113-
raise ValueError("parquet does not serializing {} "
113+
raise ValueError("parquet does not support serializing {} "
114114
"for the index; you can .reset_index()"
115115
"to make the index into column(s)".format(
116116
type(df.index)))
117117

118118
if not df.index.equals(RangeIndex.from_range(range(len(df)))):
119-
raise ValueError("parquet does not serializing a non-default index "
120-
"for the index; you can .reset_index()"
121-
"to make the index into column(s)")
119+
raise ValueError("parquet does not support serializing a "
120+
"non-default index for the index; you "
121+
"can .reset_index() to make the index "
122+
"into column(s)")
122123

123124
if df.index.name is not None:
124125
raise ValueError("parquet does not serialize index meta-data on a "
@@ -136,22 +137,22 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):
136137

137138
def read_parquet(path, engine=None, **kwargs):
138139
"""
139-
Load a parquet object from the file path
140+
Load a parquet object from the file path, returning a DataFrame.
140141
141142
.. versionadded 0.20.0
142143
143144
Parameters
144145
----------
145146
path : string
146147
File path
147-
engine : parquet engine
148-
supported are {'pyarrow', 'fastparquet'}
149-
if None, will use the option: io.parquet.engine
148+
engine : str, optional
149+
The parquet engine, one of {'pyarrow', 'fastparquet'}
150+
if None, will use the option: `io.parquet.engine`
150151
kwargs are passed to the engine
151152
152153
Returns
153154
-------
154-
type of object stored in file
155+
DataFrame
155156
156157
"""
157158

0 commit comments

Comments
 (0)