Skip to content

Commit dd27604

Browse files
committed
Fix condidition, add whatsnew
1 parent 16a3da2 commit dd27604

File tree

3 files changed

+18
-9
lines changed

3 files changed

+18
-9
lines changed

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ Performance improvements
103103
- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`)
104104
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
105105
- Performance improvement in :func:`read_parquet` on string columns when using ``use_nullable_dtypes=True`` (:issue:`47345`)
106-
-
106+
- Performance improvement in :meth:`read_parquet` and :meth:`DataFrame.to_parquet` when reading a remote file with ``engine="pyarrow"`` (:issue:`51609`)
107107

108108
.. ---------------------------------------------------------------------------
109109
.. _whatsnew_210.bug_fixes:

pandas/io/parquet.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,18 @@ def _get_path_or_handle(
8686
"""File handling for PyArrow."""
8787
path_or_handle = stringify_path(path)
8888
if is_fsspec_url(path_or_handle) and fs is None:
89-
pa = import_optional_dependency("pyarrow")
90-
pa_fs = import_optional_dependency("pyarrow.fs")
9189
fsspec = import_optional_dependency("fsspec")
90+
if storage_options is None:
91+
pa = import_optional_dependency("pyarrow")
92+
pa_fs = import_optional_dependency("pyarrow.fs")
9293

93-
try:
94-
fs_arrow = import_optional_dependency("fsspec.implementations.arrow")
95-
fs, path_or_handle = pa_fs.FileSystem.from_uri(path)
96-
fs = fs_arrow.ArrowFSWrapper(fs)
97-
except (TypeError, pa.ArrowInvalid):
94+
try:
95+
fs_arrow = import_optional_dependency("fsspec.implementations.arrow")
96+
fs, path_or_handle = pa_fs.FileSystem.from_uri(path)
97+
fs = fs_arrow.ArrowFSWrapper(fs)
98+
except (TypeError, pa.ArrowInvalid):
99+
pass
100+
if fs is None:
98101
fs, path_or_handle = fsspec.core.url_to_fs(
99102
path_or_handle, **(storage_options or {})
100103
)
@@ -426,6 +429,12 @@ def to_parquet(
426429
``io.parquet.engine`` is used. The default ``io.parquet.engine``
427430
behavior is to try 'pyarrow', falling back to 'fastparquet' if
428431
'pyarrow' is unavailable.
432+
433+
When using the ``'pyarrow'`` engine and no storage options are provided
434+
and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
435+
(e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
436+
Use the filesystem keyword with an instantiated fsspec filesystem
437+
if you wish to use its implementation.
429438
compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
430439
default 'snappy'. Name of the compression to use. Use ``None``
431440
for no compression. The supported compression methods actually

pandas/tests/io/test_gcs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121

2222
@pytest.fixture
23-
def gcs_buffer(monkeypatch):
23+
def gcs_buffer():
2424
"""Emulate GCS using a binary buffer."""
2525
import fsspec
2626

0 commit comments

Comments
 (0)