Skip to content

Commit 23aa1d1

Browse files
committed
TST: Made s3 related tests mock boto
Kept a couple around for testing things like accessing a private bucket as that's hard to mock.
1 parent 062f6f1 commit 23aa1d1

17 files changed

+202
-154
lines changed

appveyor.yml

+5
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ install:
8080
- cmd: conda list -n pandas
8181
- cmd: echo "installing requirements from %REQ% - done"
8282

83+
# add some pip only reqs to the env
84+
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
85+
- cmd: echo "installing requirements from %REQ%"
86+
- cmd: pip install -Ur %REQ%
87+
8388
# build em using the local source checkout in the correct windows env
8489
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
8590

ci/requirements-2.7.run

+1
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ patsy
1818
pymysql=0.6.3
1919
jinja2=2.8
2020
xarray=0.8.0
21+
moto

ci/requirements-2.7_SLOW.run

+1
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ psycopg2
1717
pymysql
1818
html5lib
1919
beautiful-soup
20+
moto

ci/requirements-2.7_WIN.pip

Whitespace-only changes.

ci/requirements-2.7_WIN.run

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ bottleneck
1616
html5lib
1717
beautiful-soup
1818
jinja2=2.8
19+
moto

ci/requirements-3.5.run

+1
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ psycopg2
1818
s3fs
1919
beautifulsoup4
2020
ipython
21+
moto

ci/requirements-3.5_OSX.run

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ bottleneck
1414
xarray
1515
s3fs
1616
beautifulsoup4
17+
moto

ci/requirements-3.6.run

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ beautifulsoup4
2323
s3fs
2424
xarray
2525
ipython
26+
moto

ci/requirements-3.6_LOCALE.run

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ beautifulsoup4
2020
s3fs
2121
xarray
2222
ipython
23+
moto

ci/requirements-3.6_LOCALE_SLOW.run

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ beautifulsoup4
2020
s3fs
2121
xarray
2222
ipython
23+
moto

ci/requirements-3.6_NUMPY_DEV.pip

Whitespace-only changes.

ci/requirements-3.6_WIN.pip

Whitespace-only changes.
1.29 KB
Binary file not shown.
1.7 KB
Binary file not shown.

pandas/tests/io/parser/test_network.py

+159-125
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,64 @@
44
Tests parsers ability to read and parse non-local files
55
and hence require a network connection to be read.
66
"""
7-
87
import os
8+
99
import pytest
10+
import six
1011

1112
import pandas.util.testing as tm
1213
from pandas import DataFrame
1314
from pandas.io.parsers import read_csv, read_table
1415

1516

17+
@pytest.fixture(scope='module')
18+
def tips_file():
19+
return os.path.join(tm.get_data_path(), 'tips.csv')
20+
21+
1622
@pytest.fixture(scope='module')
1723
def salaries_table():
1824
path = os.path.join(tm.get_data_path(), 'salaries.csv')
1925
return read_table(path)
2026

2127

28+
@pytest.fixture(scope='module')
29+
def test_s3_resource(request, tips_file):
30+
pytest.importorskip('s3fs')
31+
moto = pytest.importorskip('moto')
32+
moto.mock_s3().start()
33+
34+
test_s3_files = [
35+
('tips.csv', tips_file),
36+
('tips.csv.gz', tips_file + '.gz'),
37+
('tips.csv.bz2', tips_file + '.bz2'),
38+
]
39+
40+
def add_tips_files(bucket_name):
41+
for s3_key, file_name in test_s3_files:
42+
with open(file_name, 'rb') as f:
43+
conn.Bucket(bucket_name).put_object(
44+
Key=s3_key,
45+
Body=f)
46+
47+
boto3 = pytest.importorskip('boto3')
48+
# see gh-16135
49+
bucket = 'pandas-test'
50+
51+
conn = boto3.resource("s3", region_name="us-east-1")
52+
conn.create_bucket(Bucket=bucket)
53+
add_tips_files(bucket)
54+
55+
conn.create_bucket(Bucket='cant_get_it', ACL='private')
56+
add_tips_files('cant_get_it')
57+
58+
def teardown():
59+
moto.mock_s3().stop()
60+
request.addfinalizer(teardown)
61+
62+
return conn
63+
64+
2265
@pytest.mark.network
2366
@pytest.mark.parametrize(
2467
"compression,extension",
@@ -50,151 +93,142 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
5093
tm.assert_frame_equal(url_table, salaries_table)
5194

5295

53-
class TestS3(object):
96+
@tm.network
97+
def test_parse_public_s3_bucket():
98+
pytest.importorskip('s3fs')
99+
# more of an integration test due to the not-public contents portion
100+
# can probably mock this though.
101+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
102+
df = read_csv('s3://pandas-test/tips.csv' +
103+
ext, compression=comp)
104+
assert isinstance(df, DataFrame)
105+
assert not df.empty
106+
tm.assert_frame_equal(read_csv(
107+
tm.get_data_path('tips.csv')), df)
108+
109+
# Read public file from bucket with not-public contents
110+
df = read_csv('s3://cant_get_it/tips.csv')
111+
assert isinstance(df, DataFrame)
112+
assert not df.empty
113+
tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
114+
115+
116+
def test_parse_public_s3n_bucket(test_s3_resource):
117+
118+
# Read from AWS s3 as "s3n" URL
119+
df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
120+
assert isinstance(df, DataFrame)
121+
assert not df.empty
122+
tm.assert_frame_equal(read_csv(
123+
tm.get_data_path('tips.csv')).iloc[:10], df)
54124

55-
def setup_method(self, method):
56-
try:
57-
import s3fs # noqa
58-
except ImportError:
59-
pytest.skip("s3fs not installed")
60125

61-
@tm.network
62-
def test_parse_public_s3_bucket(self):
63-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
64-
df = read_csv('s3://pandas-test/tips.csv' +
65-
ext, compression=comp)
126+
def test_parse_public_s3a_bucket(test_s3_resource):
127+
# Read from AWS s3 as "s3a" URL
128+
df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
129+
assert isinstance(df, DataFrame)
130+
assert not df.empty
131+
tm.assert_frame_equal(read_csv(
132+
tm.get_data_path('tips.csv')).iloc[:10], df)
133+
134+
135+
def test_parse_public_s3_bucket_nrows(test_s3_resource):
136+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
137+
df = read_csv('s3://pandas-test/tips.csv' +
138+
ext, nrows=10, compression=comp)
139+
assert isinstance(df, DataFrame)
140+
assert not df.empty
141+
tm.assert_frame_equal(read_csv(
142+
tm.get_data_path('tips.csv')).iloc[:10], df)
143+
144+
145+
def test_parse_public_s3_bucket_chunked(test_s3_resource):
146+
# Read with a chunksize
147+
chunksize = 5
148+
local_tips = read_csv(tm.get_data_path('tips.csv'))
149+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
150+
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
151+
chunksize=chunksize, compression=comp)
152+
assert df_reader.chunksize == chunksize
153+
for i_chunk in [0, 1, 2]:
154+
# Read a couple of chunks and make sure we see them
155+
# properly.
156+
df = df_reader.get_chunk()
157+
assert isinstance(df, DataFrame)
158+
assert not df.empty
159+
true_df = local_tips.iloc[
160+
chunksize * i_chunk: chunksize * (i_chunk + 1)]
161+
tm.assert_frame_equal(true_df, df)
162+
163+
164+
def test_parse_public_s3_bucket_chunked_python(test_s3_resource):
165+
# Read with a chunksize using the Python parser
166+
chunksize = 5
167+
local_tips = read_csv(tm.get_data_path('tips.csv'))
168+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
169+
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
170+
chunksize=chunksize, compression=comp,
171+
engine='python')
172+
assert df_reader.chunksize == chunksize
173+
for i_chunk in [0, 1, 2]:
174+
# Read a couple of chunks and make sure we see them properly.
175+
df = df_reader.get_chunk()
66176
assert isinstance(df, DataFrame)
67177
assert not df.empty
68-
tm.assert_frame_equal(read_csv(
69-
tm.get_data_path('tips.csv')), df)
178+
true_df = local_tips.iloc[
179+
chunksize * i_chunk: chunksize * (i_chunk + 1)]
180+
tm.assert_frame_equal(true_df, df)
181+
70182

71-
# Read public file from bucket with not-public contents
72-
df = read_csv('s3://cant_get_it/tips.csv')
183+
def test_parse_public_s3_bucket_python(test_s3_resource):
184+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
185+
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
186+
compression=comp)
73187
assert isinstance(df, DataFrame)
74188
assert not df.empty
75-
tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
189+
tm.assert_frame_equal(read_csv(
190+
tm.get_data_path('tips.csv')), df)
76191

77-
@tm.network
78-
def test_parse_public_s3n_bucket(self):
79-
# Read from AWS s3 as "s3n" URL
80-
df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
192+
193+
def test_infer_s3_compression(test_s3_resource):
194+
for ext in ['', '.gz', '.bz2']:
195+
df = read_csv('s3://pandas-test/tips.csv' + ext,
196+
engine='python', compression='infer')
81197
assert isinstance(df, DataFrame)
82198
assert not df.empty
83199
tm.assert_frame_equal(read_csv(
84-
tm.get_data_path('tips.csv')).iloc[:10], df)
200+
tm.get_data_path('tips.csv')), df)
201+
85202

86-
@tm.network
87-
def test_parse_public_s3a_bucket(self):
88-
# Read from AWS s3 as "s3a" URL
89-
df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
203+
def test_parse_public_s3_bucket_nrows_python(test_s3_resource):
204+
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
205+
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
206+
nrows=10, compression=comp)
90207
assert isinstance(df, DataFrame)
91208
assert not df.empty
92209
tm.assert_frame_equal(read_csv(
93210
tm.get_data_path('tips.csv')).iloc[:10], df)
94211

95-
@tm.network
96-
def test_parse_public_s3_bucket_nrows(self):
97-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
98-
df = read_csv('s3://pandas-test/tips.csv' +
99-
ext, nrows=10, compression=comp)
100-
assert isinstance(df, DataFrame)
101-
assert not df.empty
102-
tm.assert_frame_equal(read_csv(
103-
tm.get_data_path('tips.csv')).iloc[:10], df)
104-
105-
@tm.network
106-
def test_parse_public_s3_bucket_chunked(self):
107-
# Read with a chunksize
108-
chunksize = 5
109-
local_tips = read_csv(tm.get_data_path('tips.csv'))
110-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
111-
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
112-
chunksize=chunksize, compression=comp)
113-
assert df_reader.chunksize == chunksize
114-
for i_chunk in [0, 1, 2]:
115-
# Read a couple of chunks and make sure we see them
116-
# properly.
117-
df = df_reader.get_chunk()
118-
assert isinstance(df, DataFrame)
119-
assert not df.empty
120-
true_df = local_tips.iloc[
121-
chunksize * i_chunk: chunksize * (i_chunk + 1)]
122-
tm.assert_frame_equal(true_df, df)
123-
124-
@tm.network
125-
def test_parse_public_s3_bucket_chunked_python(self):
126-
# Read with a chunksize using the Python parser
127-
chunksize = 5
128-
local_tips = read_csv(tm.get_data_path('tips.csv'))
129-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
130-
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
131-
chunksize=chunksize, compression=comp,
132-
engine='python')
133-
assert df_reader.chunksize == chunksize
134-
for i_chunk in [0, 1, 2]:
135-
# Read a couple of chunks and make sure we see them properly.
136-
df = df_reader.get_chunk()
137-
assert isinstance(df, DataFrame)
138-
assert not df.empty
139-
true_df = local_tips.iloc[
140-
chunksize * i_chunk: chunksize * (i_chunk + 1)]
141-
tm.assert_frame_equal(true_df, df)
142-
143-
@tm.network
144-
def test_parse_public_s3_bucket_python(self):
145-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
146-
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
147-
compression=comp)
148-
assert isinstance(df, DataFrame)
149-
assert not df.empty
150-
tm.assert_frame_equal(read_csv(
151-
tm.get_data_path('tips.csv')), df)
152-
153-
@tm.network
154-
def test_infer_s3_compression(self):
155-
for ext in ['', '.gz', '.bz2']:
156-
df = read_csv('s3://pandas-test/tips.csv' + ext,
157-
engine='python', compression='infer')
158-
assert isinstance(df, DataFrame)
159-
assert not df.empty
160-
tm.assert_frame_equal(read_csv(
161-
tm.get_data_path('tips.csv')), df)
162-
163-
@tm.network
164-
def test_parse_public_s3_bucket_nrows_python(self):
165-
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
166-
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
167-
nrows=10, compression=comp)
168-
assert isinstance(df, DataFrame)
169-
assert not df.empty
170-
tm.assert_frame_equal(read_csv(
171-
tm.get_data_path('tips.csv')).iloc[:10], df)
172212

173-
@tm.network
174-
def test_s3_fails(self):
175-
with pytest.raises(IOError):
176-
read_csv('s3://nyqpug/asdf.csv')
213+
def test_s3_fails(test_s3_resource):
214+
with pytest.raises(IOError):
215+
read_csv('s3://nyqpug/asdf.csv')
177216

178-
# Receive a permission error when trying to read a private bucket.
179-
# It's irrelevant here that this isn't actually a table.
180-
with pytest.raises(IOError):
181-
read_csv('s3://cant_get_it/')
217+
# Receive a permission error when trying to read a private bucket.
218+
# It's irrelevant here that this isn't actually a table.
219+
with pytest.raises(IOError):
220+
read_csv('s3://cant_get_it/')
182221

183-
@tm.network
184-
def boto3_client_s3(self):
185-
# see gh-16135
186222

187-
# boto3 is a dependency of s3fs
188-
import boto3
189-
client = boto3.client("s3")
223+
def test_read_csv__handles_boto_s3_object(test_s3_resource, tips_file):
224+
# see gh-16135
190225

191-
key = "/tips.csv"
192-
bucket = "pandas-test"
193-
s3_object = client.get_object(Bucket=bucket, Key=key)
226+
s3_object = test_s3_resource.meta.client.get_object(Bucket='pandas-test',
227+
Key='tips.csv')
194228

195-
result = read_csv(s3_object["Body"])
196-
assert isinstance(result, DataFrame)
197-
assert not result.empty
229+
result = read_csv(six.BytesIO(s3_object["Body"].read()), encoding='utf8')
230+
assert isinstance(result, DataFrame)
231+
assert not result.empty
198232

199-
expected = read_csv(tm.get_data_path('tips.csv'))
200-
tm.assert_frame_equal(result, expected)
233+
expected = read_csv(tips_file)
234+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)