diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 943a6adb7944e..7d78a8fe6dd84 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -103,7 +103,7 @@ MultiIndex I/O ^^^ -- +- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) - - diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 82c460300582b..6fc70e9f4a737 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -184,12 +184,14 @@ def write( def read(self, path, columns=None, **kwargs): if is_s3_url(path): + from pandas.io.s3 import get_file_and_filesystem + # When path is s3:// an S3File is returned. # We need to retain the original path(str) while also # pass the S3File().open function to fsatparquet impl. - s3, _, _, should_close = get_filepath_or_buffer(path) + s3, filesystem = get_file_and_filesystem(path) try: - parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open) + parquet_file = self.api.ParquetFile(path, open_with=filesystem.open) finally: s3.close() else: diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 0a7c082fec51c..7e0a37e8cba20 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -1,8 +1,11 @@ """ s3 support for remote file interactivity """ +from typing import IO, Any, Optional, Tuple from urllib.parse import urlparse as parse_url from pandas.compat._optional import import_optional_dependency +from pandas._typing import FilePathOrBuffer + s3fs = import_optional_dependency( "s3fs", extra="The s3fs package is required to handle s3 files." ) @@ -14,9 +17,9 @@ def _strip_schema(url): return result.netloc + result.path -def get_filepath_or_buffer( - filepath_or_buffer, encoding=None, compression=None, mode=None -): +def get_file_and_filesystem( + filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None +) -> Tuple[IO, Any]: from botocore.exceptions import NoCredentialsError if mode is None: @@ -24,7 +27,7 @@ def get_filepath_or_buffer( fs = s3fs.S3FileSystem(anon=False) try: - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) + file = fs.open(_strip_schema(filepath_or_buffer), mode) except (FileNotFoundError, NoCredentialsError): # boto3 has troubles when trying to access a public file # when credentialed... @@ -33,5 +36,15 @@ def get_filepath_or_buffer( # A NoCredentialsError is raised if you don't have creds # for that bucket. fs = s3fs.S3FileSystem(anon=True) - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) - return filepath_or_buffer, None, compression, True + file = fs.open(_strip_schema(filepath_or_buffer), mode) + return file, fs + + +def get_filepath_or_buffer( + filepath_or_buffer: FilePathOrBuffer, + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +) -> Tuple[IO, Optional[str], Optional[str], bool]: + file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode) + return file, None, compression, True