-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
IO: Fix S3 Error Handling #33645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
IO: Fix S3 Error Handling #33645
Changes from 3 commits
b8e636b
c891f22
6c60dc1
b745e1d
31701e8
83ffcb6
25ca427
7bd1c9f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,7 @@ def write( | |
**kwargs, | ||
): | ||
self.validate_dataframe(df) | ||
path, _, _, _ = get_filepath_or_buffer(path, mode="wb") | ||
path, _, _, should_close = get_filepath_or_buffer(path, mode="wb") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For some reason we only have this logic in read. Should add to write too There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, can you open an issue for this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For which bit? File basically needs to be closed post write for s3fs to throw as expected. @TomAugspurger mention it here: #32486 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
your comment here. |
||
|
||
from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} | ||
if index is not None: | ||
|
@@ -109,6 +109,8 @@ def write( | |
) | ||
else: | ||
self.api.parquet.write_table(table, path, compression=compression, **kwargs) | ||
if should_close: | ||
path.close() | ||
|
||
def read(self, path, columns=None, **kwargs): | ||
path, _, _, should_close = get_filepath_or_buffer(path) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,7 +159,7 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df): | |
assert not df.empty | ||
tm.assert_frame_equal(tips_df.iloc[:10], df) | ||
|
||
def test_s3_fails(self): | ||
def test_read_s3_fails(self): | ||
with pytest.raises(IOError): | ||
alimcmaster1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
read_csv("s3://nyqpug/asdf.csv") | ||
|
||
|
@@ -168,6 +168,20 @@ def test_s3_fails(self): | |
with pytest.raises(IOError): | ||
read_csv("s3://cant_get_it/file.csv") | ||
|
||
def test_write_s3_csv_fails(self, tips_df): | ||
# GH 32486 | ||
with pytest.raises( | ||
FileNotFoundError, match="The specified bucket does not exist" | ||
): | ||
tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") | ||
|
||
def test_write_s3_parquet_fails(self, tips_df): | ||
# GH 27679 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need an importorskip, or do we raise prior to importing the engine? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added an importskip. Would raise ImportError if PyArrow or fastparquet isn’t installed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the decorator version instead There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have done - and changed other importerskip in this file |
||
with pytest.raises( | ||
FileNotFoundError, match="The specified bucket does not exist" | ||
): | ||
tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet") | ||
|
||
def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): | ||
# see gh-16135 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,7 +56,15 @@ def open(*args): | |
|
||
monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) | ||
df1.to_csv("gs://test/test.csv", index=True) | ||
df2 = read_csv(StringIO(s.getvalue()), parse_dates=["dt"], index_col=0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This no longer works since file needs to be closed to validate credentials. #32486 (comment) |
||
|
||
def mock_get_filepath_or_buffer(*args, **kwargs): | ||
return StringIO(df1.to_csv()), None, None, False | ||
|
||
monkeypatch.setattr( | ||
"pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer | ||
) | ||
|
||
df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0) | ||
|
||
tm.assert_frame_equal(df1, df2) | ||
|
||
|
@@ -86,28 +94,6 @@ def open(self, path, mode="r", *args): | |
) | ||
|
||
|
||
@td.skip_if_no("gcsfs") | ||
def test_gcs_get_filepath_or_buffer(monkeypatch): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed since |
||
df1 = DataFrame( | ||
{ | ||
"int": [1, 3], | ||
"float": [2.0, np.nan], | ||
"str": ["t", "s"], | ||
"dt": date_range("2018-06-18", periods=2), | ||
} | ||
) | ||
|
||
def mock_get_filepath_or_buffer(*args, **kwargs): | ||
return (StringIO(df1.to_csv(index=False)), None, None, False) | ||
|
||
monkeypatch.setattr( | ||
"pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer | ||
) | ||
df2 = read_csv("gs://test/test.csv", parse_dates=["dt"]) | ||
|
||
tm.assert_frame_equal(df1, df2) | ||
|
||
|
||
@td.skip_if_installed("gcsfs") | ||
def test_gcs_not_present_exception(): | ||
with pytest.raises(ImportError) as e: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@TomAugspurger - you pointer here was the exact problem #32486 (comment)