Skip to content

Commit 35821a5

Browse files
CJStadlerTomAugspurger
authored andcommitted
Avoid calling S3File.s3 (#27777)
* Avoid calling S3File.s3 When reading from s3 using fastparquet. This attribute was removed in s3fs 0.3.0. This change avoids accessing it by using a new method get_file_and_filesystem which returns the filesystem in addition to the file.
1 parent 6afa2ad commit 35821a5

File tree

3 files changed

+24
-9
lines changed

3 files changed

+24
-9
lines changed

doc/source/whatsnew/v0.25.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ MultiIndex
104104
I/O
105105
^^^
106106

107-
-
107+
- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
108108
-
109109
-
110110

pandas/io/parquet.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,14 @@ def write(
184184

185185
def read(self, path, columns=None, **kwargs):
186186
if is_s3_url(path):
187+
from pandas.io.s3 import get_file_and_filesystem
188+
187189
# When path is s3:// an S3File is returned.
188190
# We need to retain the original path(str) while also
189191
# pass the S3File().open function to fsatparquet impl.
190-
s3, _, _, should_close = get_filepath_or_buffer(path)
192+
s3, filesystem = get_file_and_filesystem(path)
191193
try:
192-
parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open)
194+
parquet_file = self.api.ParquetFile(path, open_with=filesystem.open)
193195
finally:
194196
s3.close()
195197
else:

pandas/io/s3.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
""" s3 support for remote file interactivity """
2+
from typing import IO, Any, Optional, Tuple
23
from urllib.parse import urlparse as parse_url
34

45
from pandas.compat._optional import import_optional_dependency
56

7+
from pandas._typing import FilePathOrBuffer
8+
69
s3fs = import_optional_dependency(
710
"s3fs", extra="The s3fs package is required to handle s3 files."
811
)
@@ -14,17 +17,17 @@ def _strip_schema(url):
1417
return result.netloc + result.path
1518

1619

17-
def get_filepath_or_buffer(
18-
filepath_or_buffer, encoding=None, compression=None, mode=None
19-
):
20+
def get_file_and_filesystem(
21+
filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None
22+
) -> Tuple[IO, Any]:
2023
from botocore.exceptions import NoCredentialsError
2124

2225
if mode is None:
2326
mode = "rb"
2427

2528
fs = s3fs.S3FileSystem(anon=False)
2629
try:
27-
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
30+
file = fs.open(_strip_schema(filepath_or_buffer), mode)
2831
except (FileNotFoundError, NoCredentialsError):
2932
# boto3 has troubles when trying to access a public file
3033
# when credentialed...
@@ -33,5 +36,15 @@ def get_filepath_or_buffer(
3336
# A NoCredentialsError is raised if you don't have creds
3437
# for that bucket.
3538
fs = s3fs.S3FileSystem(anon=True)
36-
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
37-
return filepath_or_buffer, None, compression, True
39+
file = fs.open(_strip_schema(filepath_or_buffer), mode)
40+
return file, fs
41+
42+
43+
def get_filepath_or_buffer(
44+
filepath_or_buffer: FilePathOrBuffer,
45+
encoding: Optional[str] = None,
46+
compression: Optional[str] = None,
47+
mode: Optional[str] = None,
48+
) -> Tuple[IO, Optional[str], Optional[str], bool]:
49+
file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
50+
return file, None, compression, True

0 commit comments

Comments
 (0)