Skip to content

Commit d6fe194

Browse files
authored
TST: placement of network error catching in s3 tests (#19645)
1 parent 569bc7a commit d6fe194

14 files changed

+114
-59
lines changed

pandas/io/common.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
183183
184184
Returns
185185
-------
186-
a filepath_ or buffer or S3File instance, the encoding, the compression
186+
tuple of ({a filepath_ or buffer or S3File instance},
187+
encoding, str,
188+
compression, str,
189+
should_close, bool)
187190
"""
188191
filepath_or_buffer = _stringify_path(filepath_or_buffer)
189192

@@ -194,7 +197,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
194197
# Override compression based on Content-Encoding header
195198
compression = 'gzip'
196199
reader = BytesIO(req.read())
197-
return reader, encoding, compression
200+
req.close()
201+
return reader, encoding, compression, True
198202

199203
if is_s3_url(filepath_or_buffer):
200204
from pandas.io import s3
@@ -206,13 +210,13 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
206210
if isinstance(filepath_or_buffer, (compat.string_types,
207211
compat.binary_type,
208212
mmap.mmap)):
209-
return _expand_user(filepath_or_buffer), None, compression
213+
return _expand_user(filepath_or_buffer), None, compression, False
210214

211215
if not is_file_like(filepath_or_buffer):
212216
msg = "Invalid file path or buffer object type: {_type}"
213217
raise ValueError(msg.format(_type=type(filepath_or_buffer)))
214218

215-
return filepath_or_buffer, None, compression
219+
return filepath_or_buffer, None, compression, False
216220

217221

218222
def file_path_to_url(path):
@@ -309,6 +313,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
309313
is_text : boolean, default True
310314
whether file/buffer is in text format (csv, json, etc.), or in binary
311315
mode (pickle, etc.)
316+
312317
Returns
313318
-------
314319
f : file-like

pandas/io/excel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def __init__(self, io, **kwds):
381381
if _is_url(self._io):
382382
io = _urlopen(self._io)
383383
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
384-
io, _, _ = get_filepath_or_buffer(self._io)
384+
io, _, _, _ = get_filepath_or_buffer(self._io)
385385

386386
if engine == 'xlrd' and isinstance(io, xlrd.Book):
387387
self.book = io

pandas/io/json/json.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
404404
"""
405405

406406
compression = _infer_compression(path_or_buf, compression)
407-
filepath_or_buffer, _, compression = get_filepath_or_buffer(
407+
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
408408
path_or_buf, encoding=encoding, compression=compression,
409409
)
410410

@@ -419,7 +419,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
419419
if chunksize:
420420
return json_reader
421421

422-
return json_reader.read()
422+
result = json_reader.read()
423+
if should_close:
424+
try:
425+
filepath_or_buffer.close()
426+
except: # noqa: flake8
427+
pass
428+
return result
423429

424430

425431
class JsonReader(BaseIterator):

pandas/io/packers.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,20 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
180180
obj : type of object stored in file
181181
182182
"""
183-
path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
183+
path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
184184
if iterator:
185185
return Iterator(path_or_buf)
186186

187187
def read(fh):
188188
l = list(unpack(fh, encoding=encoding, **kwargs))
189189
if len(l) == 1:
190190
return l[0]
191+
192+
if should_close:
193+
try:
194+
path_or_buf.close()
195+
except: # noqa: flake8
196+
pass
191197
return l
192198

193199
# see if we have an actual file

pandas/io/parquet.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def write(self, df, path, compression='snappy',
107107
self.validate_dataframe(df)
108108
if self._pyarrow_lt_070:
109109
self._validate_write_lt_070(df)
110-
path, _, _ = get_filepath_or_buffer(path, mode='wb')
110+
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
111111

112112
if self._pyarrow_lt_060:
113113
table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
@@ -121,13 +121,21 @@ def write(self, df, path, compression='snappy',
121121
coerce_timestamps=coerce_timestamps, **kwargs)
122122

123123
def read(self, path, columns=None, **kwargs):
124-
path, _, _ = get_filepath_or_buffer(path)
124+
path, _, _, should_close = get_filepath_or_buffer(path)
125125
if self._pyarrow_lt_070:
126-
return self.api.parquet.read_pandas(path, columns=columns,
127-
**kwargs).to_pandas()
128-
kwargs['use_pandas_metadata'] = True
129-
return self.api.parquet.read_table(path, columns=columns,
130-
**kwargs).to_pandas()
126+
result = self.api.parquet.read_pandas(path, columns=columns,
127+
**kwargs).to_pandas()
128+
else:
129+
kwargs['use_pandas_metadata'] = True
130+
result = self.api.parquet.read_table(path, columns=columns,
131+
**kwargs).to_pandas()
132+
if should_close:
133+
try:
134+
path.close()
135+
except: # noqa: flake8
136+
pass
137+
138+
return result
131139

132140
def _validate_write_lt_070(self, df):
133141
# Compatibility shim for pyarrow < 0.7.0
@@ -199,11 +207,11 @@ def write(self, df, path, compression='snappy', **kwargs):
199207
# path is s3:// so we need to open the s3file in 'wb' mode.
200208
# TODO: Support 'ab'
201209

202-
path, _, _ = get_filepath_or_buffer(path, mode='wb')
210+
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
203211
# And pass the opened s3file to the fastparquet internal impl.
204212
kwargs['open_with'] = lambda path, _: path
205213
else:
206-
path, _, _ = get_filepath_or_buffer(path)
214+
path, _, _, _ = get_filepath_or_buffer(path)
207215

208216
with catch_warnings(record=True):
209217
self.api.write(path, df,
@@ -214,13 +222,13 @@ def read(self, path, columns=None, **kwargs):
214222
# When path is s3:// an S3File is returned.
215223
# We need to retain the original path(str) while also
216224
# pass the S3File().open function to fsatparquet impl.
217-
s3, _, _ = get_filepath_or_buffer(path)
225+
s3, _, _, should_close = get_filepath_or_buffer(path)
218226
try:
219227
parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open)
220228
finally:
221229
s3.close()
222230
else:
223-
path, _, _ = get_filepath_or_buffer(path)
231+
path, _, _, _ = get_filepath_or_buffer(path)
224232
parquet_file = self.api.ParquetFile(path)
225233

226234
return parquet_file.to_pandas(columns=columns, **kwargs)

pandas/io/parsers.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ def _read(filepath_or_buffer, kwds):
413413

414414
compression = kwds.get('compression')
415415
compression = _infer_compression(filepath_or_buffer, compression)
416-
filepath_or_buffer, _, compression = get_filepath_or_buffer(
416+
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
417417
filepath_or_buffer, encoding, compression)
418418
kwds['compression'] = compression
419419

@@ -439,6 +439,13 @@ def _read(filepath_or_buffer, kwds):
439439
data = parser.read(nrows)
440440
finally:
441441
parser.close()
442+
443+
if should_close:
444+
try:
445+
filepath_or_buffer.close()
446+
except: # noqa: flake8
447+
pass
448+
442449
return data
443450

444451

pandas/io/s3.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
2727
fs = s3fs.S3FileSystem(anon=False)
2828
try:
2929
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
30-
except (OSError, NoCredentialsError):
30+
except (compat.FileNotFoundError, NoCredentialsError):
3131
# boto3 has troubles when trying to access a public file
3232
# when credentialed...
3333
# An OSError is raised if you have credentials, but they
@@ -36,4 +36,4 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
3636
# for that bucket.
3737
fs = s3fs.S3FileSystem(anon=True)
3838
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
39-
return filepath_or_buffer, None, compression
39+
return filepath_or_buffer, None, compression, True

pandas/io/sas/sas7bdat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
9090
self._current_row_on_page_index = 0
9191
self._current_row_in_file_index = 0
9292

93-
self._path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
93+
self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf)
9494
if isinstance(self._path_or_buf, compat.string_types):
9595
self._path_or_buf = open(self._path_or_buf, 'rb')
9696
self.handle = self._path_or_buf

pandas/io/sas/sas_xport.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
236236
self._chunksize = chunksize
237237

238238
if isinstance(filepath_or_buffer, str):
239-
filepath_or_buffer, encoding, compression = get_filepath_or_buffer(
239+
(filepath_or_buffer, encoding,
240+
compression, should_close) = get_filepath_or_buffer(
240241
filepath_or_buffer, encoding=encoding)
241242

242243
if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):

pandas/io/stata.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ def __init__(self, path_or_buf, convert_dates=True,
988988
self._native_byteorder = _set_endianness(sys.byteorder)
989989
path_or_buf = _stringify_path(path_or_buf)
990990
if isinstance(path_or_buf, str):
991-
path_or_buf, encoding, _ = get_filepath_or_buffer(
991+
path_or_buf, encoding, _, should_close = get_filepath_or_buffer(
992992
path_or_buf, encoding=self._default_encoding
993993
)
994994

pandas/tests/io/conftest.py

+31-22
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,34 @@
22

33
import pytest
44
from pandas.io.parsers import read_table
5+
from pandas.util import testing as tm
56

6-
HERE = os.path.dirname(__file__)
77

8+
@pytest.fixture
9+
def parser_data(request):
10+
return os.path.join(tm.get_data_path(), '..', 'parser', 'data')
811

9-
@pytest.fixture(scope='module')
10-
def tips_file():
12+
13+
@pytest.fixture
14+
def tips_file(parser_data):
1115
"""Path to the tips dataset"""
12-
return os.path.join(HERE, 'parser', 'data', 'tips.csv')
16+
return os.path.join(parser_data, 'tips.csv')
1317

1418

15-
@pytest.fixture(scope='module')
16-
def jsonl_file():
19+
@pytest.fixture
20+
def jsonl_file(parser_data):
1721
"""Path a JSONL dataset"""
18-
return os.path.join(HERE, 'parser', 'data', 'items.jsonl')
22+
return os.path.join(parser_data, 'items.jsonl')
1923

2024

21-
@pytest.fixture(scope='module')
22-
def salaries_table():
25+
@pytest.fixture
26+
def salaries_table(parser_data):
2327
"""DataFrame with the salaries dataset"""
24-
path = os.path.join(HERE, 'parser', 'data', 'salaries.csv')
28+
path = os.path.join(parser_data, 'salaries.csv')
2529
return read_table(path)
2630

2731

28-
@pytest.fixture(scope='module')
32+
@pytest.fixture
2933
def s3_resource(tips_file, jsonl_file):
3034
"""Fixture for mocking S3 interaction.
3135
@@ -41,8 +45,8 @@ def s3_resource(tips_file, jsonl_file):
4145
is yielded by the fixture.
4246
"""
4347
pytest.importorskip('s3fs')
48+
boto3 = pytest.importorskip('boto3')
4449
moto = pytest.importorskip('moto')
45-
moto.mock_s3().start()
4650

4751
test_s3_files = [
4852
('tips.csv', tips_file),
@@ -58,17 +62,22 @@ def add_tips_files(bucket_name):
5862
Key=s3_key,
5963
Body=f)
6064

61-
boto3 = pytest.importorskip('boto3')
62-
# see gh-16135
63-
bucket = 'pandas-test'
65+
try:
6466

65-
conn = boto3.resource("s3", region_name="us-east-1")
66-
conn.create_bucket(Bucket=bucket)
67-
add_tips_files(bucket)
67+
s3 = moto.mock_s3()
68+
s3.start()
6869

69-
conn.create_bucket(Bucket='cant_get_it', ACL='private')
70-
add_tips_files('cant_get_it')
70+
# see gh-16135
71+
bucket = 'pandas-test'
72+
conn = boto3.resource("s3", region_name="us-east-1")
7173

72-
yield conn
74+
conn.create_bucket(Bucket=bucket)
75+
add_tips_files(bucket)
7376

74-
moto.mock_s3().stop()
77+
conn.create_bucket(Bucket='cant_get_it', ACL='private')
78+
add_tips_files('cant_get_it')
79+
yield conn
80+
except: # noqa: flake8
81+
pytest.skip("failure to use s3 resource")
82+
finally:
83+
s3.stop()

pandas/tests/io/json/test_pandas.py

-1
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,6 @@ def test_read_inline_jsonl(self):
10391039
assert_frame_equal(result, expected)
10401040

10411041
def test_read_s3_jsonl(self, s3_resource):
1042-
pytest.importorskip('s3fs')
10431042
# GH17200
10441043

10451044
result = read_json('s3n://pandas-test/items.jsonl', lines=True)

0 commit comments

Comments
 (0)