Skip to content

Commit 71e1e5d

Browse files
committed
add configuration options & cross-engine tests
1 parent 99d3556 commit 71e1e5d

File tree

4 files changed

+71
-6
lines changed

4 files changed

+71
-6
lines changed

pandas/core/config_init.py

+11
Original file line numberDiff line numberDiff line change
@@ -466,3 +466,14 @@ def _register_xlsx(engine, other):
466466
except ImportError:
467467
# fallback
468468
_register_xlsx('openpyxl', 'xlsxwriter')
469+
470+
# Set up the io.parquet specific configuration.
471+
parquet_engine_doc = """
472+
: string
473+
The default parquet reader/writer engine. Available options:
474+
None, 'pyarrow', 'fastparquet'
475+
"""
476+
477+
with cf.config_prefix('io.parquet'):
478+
cf.register_option('engine', 'pyarrow', parquet_engine_doc,
479+
validator=is_one_of_factory(['pyarrow', 'fastparquet']))

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1520,7 +1520,7 @@ def to_feather(self, fname):
15201520
from pandas.io.feather_format import to_feather
15211521
to_feather(self, fname)
15221522

1523-
def to_parquet(self, fname, engine, compression=None,
1523+
def to_parquet(self, fname, engine=None, compression=None,
15241524
**kwargs):
15251525
"""
15261526
write out the binary parquet for DataFrames
@@ -1533,6 +1533,7 @@ def to_parquet(self, fname, engine, compression=None,
15331533
string file path
15341534
engine : parquet engine
15351535
supported are {'pyarrow', 'fastparquet'}
1536+
if None, will use the option: io.parquet.engine
15361537
compression : str, optional
15371538
compression method, includes {'gzip', 'snappy', 'brotli'}
15381539
kwargs passed to the engine

pandas/io/parquet.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
""" parquet compat """
22

33
from warnings import catch_warnings
4-
from pandas import DataFrame, RangeIndex, Int64Index
4+
from pandas import DataFrame, RangeIndex, Int64Index, get_option
55
from pandas.compat import range
66

77

88
def get_engine(engine):
99
""" return our implementation """
1010

11+
if engine is None:
12+
engine = get_option('io.parquet.engine')
13+
1114
if engine not in ['pyarrow', 'fastparquet']:
1215
raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
1316

@@ -71,7 +74,7 @@ def read(self, path):
7174
return self.api.ParquetFile(path).to_pandas()
7275

7376

74-
def to_parquet(df, path, engine, compression=None, **kwargs):
77+
def to_parquet(df, path, engine=None, compression=None, **kwargs):
7578
"""
7679
Write a DataFrame to the pyarrow
7780
@@ -82,6 +85,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
8285
File path
8386
engine : parquet engine
8487
supported are {'pyarrow', 'fastparquet'}
88+
if None, will use the option: io.parquet.engine
8589
compression : str, optional
8690
compression method, includes {'gzip', 'snappy', 'brotli'}
8791
kwargs are passed to the engine
@@ -125,7 +129,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
125129
return impl.write(df, path, compression=compression)
126130

127131

128-
def read_parquet(path, engine, **kwargs):
132+
def read_parquet(path, engine=None, **kwargs):
129133
"""
130134
Load a parquet object from the file path
131135
@@ -137,6 +141,7 @@ def read_parquet(path, engine, **kwargs):
137141
File path
138142
engine : parquet engine
139143
supported are {'pyarrow', 'fastparquet'}
144+
if None, will use the option: io.parquet.engine
140145
kwargs are passed to the engine
141146
142147
Returns

pandas/tests/io/test_parquet.py

+50-2
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,58 @@ def fp():
4444
return 'fastparquet'
4545

4646

47-
def test_invalid_engine():
47+
@pytest.fixture
48+
def df_compat():
49+
return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'})
50+
51+
52+
def test_invalid_engine(df_compat):
4853

4954
with pytest.raises(ValueError):
50-
tm.makeDataFrame().to_parquet('foo', 'bar')
55+
df_compat.to_parquet('foo', 'bar')
56+
57+
58+
def test_options_py(df_compat, pa):
59+
# use the set option
60+
61+
df = df_compat
62+
with tm.ensure_clean() as path:
63+
64+
with pd.option_context('io.parquet.engine', 'pyarrow'):
65+
df.to_parquet(path)
66+
67+
result = read_parquet(path)
68+
tm.assert_frame_equal(result, df)
69+
70+
71+
def test_options_fp(df_compat, fp):
72+
# use the set option
73+
74+
df = df_compat
75+
with tm.ensure_clean() as path:
76+
77+
with pd.option_context('io.parquet.engine', 'fastparquet'):
78+
df.to_parquet(path)
79+
80+
result = read_parquet(path)
81+
tm.assert_frame_equal(result, df)
82+
83+
84+
def test_cross_engine(df_compat, pa, fp):
85+
# cross-compat with differing reading/writing engines
86+
87+
df = df_compat
88+
with tm.ensure_clean() as path:
89+
df.to_parquet(path, engine=pa)
90+
91+
result = read_parquet(path, engine=fp)
92+
tm.assert_frame_equal(result, df)
93+
94+
with tm.ensure_clean() as path:
95+
df.to_parquet(path, engine=fp)
96+
97+
result = read_parquet(path, engine=pa)
98+
tm.assert_frame_equal(result, df)
5199

52100

53101
class Base(object):

0 commit comments

Comments
 (0)