Skip to content

Commit 7f97c13

Browse files
TomAugspurgerjavadnoorb
authored andcommitted
REF: Mock all S3 Tests (pandas-dev#20409)
* REF: Mock all S3 Tests Closes pandas-dev#19825
1 parent 689b3fe commit 7f97c13

File tree

2 files changed

+24
-43
lines changed

2 files changed

+24
-43
lines changed

asv_bench/benchmarks/io/csv.py

-32
Original file line numberDiff line numberDiff line change
@@ -118,38 +118,6 @@ def time_read_uint64_na_values(self):
118118
na_values=self.na_values)
119119

120120

121-
class S3(object):
122-
# Make sure that we can read part of a file from S3 without
123-
# needing to download the entire thing. Use the timeit.default_timer
124-
# to measure wall time instead of CPU time -- we want to see
125-
# how long it takes to download the data.
126-
timer = timeit.default_timer
127-
params = ([None, "gzip", "bz2"], ["python", "c"])
128-
param_names = ["compression", "engine"]
129-
130-
def setup(self, compression, engine):
131-
if compression == "bz2" and engine == "c" and PY2:
132-
# The Python 2 C parser can't read bz2 from open files.
133-
raise NotImplementedError
134-
try:
135-
import s3fs # noqa
136-
except ImportError:
137-
# Skip these benchmarks if `boto` is not installed.
138-
raise NotImplementedError
139-
140-
ext = ""
141-
if compression == "gzip":
142-
ext = ".gz"
143-
elif compression == "bz2":
144-
ext = ".bz2"
145-
self.big_fname = "s3://pandas-test/large_random.csv" + ext
146-
147-
def time_read_csv_10_rows(self, compression, engine):
148-
# Read a small number of rows from a huge (100,000 x 50) table.
149-
read_csv(self.big_fname, nrows=10, compression=compression,
150-
engine=engine)
151-
152-
153121
class ReadCSVThousands(BaseIO):
154122

155123
goal_time = 0.2

pandas/tests/io/parser/test_network.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
Tests parsers ability to read and parse non-local files
55
and hence require a network connection to be read.
66
"""
7+
import logging
8+
79
import pytest
10+
import numpy as np
811

912
import pandas.util.testing as tm
1013
import pandas.util._test_decorators as td
1114
from pandas import DataFrame
1215
from pandas.io.parsers import read_csv, read_table
13-
from pandas.compat import BytesIO
16+
from pandas.compat import BytesIO, StringIO
1417

1518

1619
@pytest.mark.network
@@ -45,9 +48,9 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
4548
tm.assert_frame_equal(url_table, salaries_table)
4649

4750

51+
@pytest.mark.usefixtures("s3_resource")
4852
class TestS3(object):
4953

50-
@tm.network
5154
def test_parse_public_s3_bucket(self):
5255
pytest.importorskip('s3fs')
5356
# more of an integration test due to the not-public contents portion
@@ -66,7 +69,6 @@ def test_parse_public_s3_bucket(self):
6669
assert not df.empty
6770
tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
6871

69-
@tm.network
7072
def test_parse_public_s3n_bucket(self):
7173

7274
# Read from AWS s3 as "s3n" URL
@@ -76,7 +78,6 @@ def test_parse_public_s3n_bucket(self):
7678
tm.assert_frame_equal(read_csv(
7779
tm.get_data_path('tips.csv')).iloc[:10], df)
7880

79-
@tm.network
8081
def test_parse_public_s3a_bucket(self):
8182
# Read from AWS s3 as "s3a" URL
8283
df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
@@ -85,7 +86,6 @@ def test_parse_public_s3a_bucket(self):
8586
tm.assert_frame_equal(read_csv(
8687
tm.get_data_path('tips.csv')).iloc[:10], df)
8788

88-
@tm.network
8989
def test_parse_public_s3_bucket_nrows(self):
9090
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
9191
df = read_csv('s3://pandas-test/tips.csv' +
@@ -95,7 +95,6 @@ def test_parse_public_s3_bucket_nrows(self):
9595
tm.assert_frame_equal(read_csv(
9696
tm.get_data_path('tips.csv')).iloc[:10], df)
9797

98-
@tm.network
9998
def test_parse_public_s3_bucket_chunked(self):
10099
# Read with a chunksize
101100
chunksize = 5
@@ -114,7 +113,6 @@ def test_parse_public_s3_bucket_chunked(self):
114113
chunksize * i_chunk: chunksize * (i_chunk + 1)]
115114
tm.assert_frame_equal(true_df, df)
116115

117-
@tm.network
118116
def test_parse_public_s3_bucket_chunked_python(self):
119117
# Read with a chunksize using the Python parser
120118
chunksize = 5
@@ -133,7 +131,6 @@ def test_parse_public_s3_bucket_chunked_python(self):
133131
chunksize * i_chunk: chunksize * (i_chunk + 1)]
134132
tm.assert_frame_equal(true_df, df)
135133

136-
@tm.network
137134
def test_parse_public_s3_bucket_python(self):
138135
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
139136
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
@@ -143,7 +140,6 @@ def test_parse_public_s3_bucket_python(self):
143140
tm.assert_frame_equal(read_csv(
144141
tm.get_data_path('tips.csv')), df)
145142

146-
@tm.network
147143
def test_infer_s3_compression(self):
148144
for ext in ['', '.gz', '.bz2']:
149145
df = read_csv('s3://pandas-test/tips.csv' + ext,
@@ -153,7 +149,6 @@ def test_infer_s3_compression(self):
153149
tm.assert_frame_equal(read_csv(
154150
tm.get_data_path('tips.csv')), df)
155151

156-
@tm.network
157152
def test_parse_public_s3_bucket_nrows_python(self):
158153
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
159154
df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
@@ -163,7 +158,6 @@ def test_parse_public_s3_bucket_nrows_python(self):
163158
tm.assert_frame_equal(read_csv(
164159
tm.get_data_path('tips.csv')).iloc[:10], df)
165160

166-
@tm.network
167161
def test_s3_fails(self):
168162
with pytest.raises(IOError):
169163
read_csv('s3://nyqpug/asdf.csv')
@@ -188,3 +182,22 @@ def test_read_csv_handles_boto_s3_object(self,
188182

189183
expected = read_csv(tips_file)
190184
tm.assert_frame_equal(result, expected)
185+
186+
def test_read_csv_chunked_download(self, s3_resource, caplog):
187+
# 8 MB, S3FS usees 5MB chunks
188+
df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
189+
buf = BytesIO()
190+
str_buf = StringIO()
191+
192+
df.to_csv(str_buf)
193+
194+
buf = BytesIO(str_buf.getvalue().encode('utf-8'))
195+
196+
s3_resource.Bucket("pandas-test").put_object(
197+
Key="large-file.csv",
198+
Body=buf)
199+
200+
with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
201+
read_csv("s3://pandas-test/large-file.csv", nrows=5)
202+
# log of fetch_range (start, stop)
203+
assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))

0 commit comments

Comments
 (0)