4
4
Tests parsers ability to read and parse non-local files
5
5
and hence require a network connection to be read.
6
6
"""
7
-
8
7
import os
8
+
9
9
import pytest
10
+ import moto
10
11
11
12
import pandas .util .testing as tm
12
13
from pandas import DataFrame
13
14
from pandas .io .parsers import read_csv , read_table
15
+ from pandas .compat import BytesIO
16
+
17
+
18
+ @pytest .fixture (scope = 'module' )
19
+ def tips_file ():
20
+ return os .path .join (tm .get_data_path (), 'tips.csv' )
14
21
15
22
16
23
@pytest .fixture (scope = 'module' )
@@ -19,6 +26,40 @@ def salaries_table():
19
26
return read_table (path )
20
27
21
28
29
+ @pytest .fixture (scope = 'module' )
30
+ def test_s3_resource (tips_file ):
31
+ pytest .importorskip ('s3fs' )
32
+ moto .mock_s3 ().start ()
33
+
34
+ test_s3_files = [
35
+ ('tips.csv' , tips_file ),
36
+ ('tips.csv.gz' , tips_file + '.gz' ),
37
+ ('tips.csv.bz2' , tips_file + '.bz2' ),
38
+ ]
39
+
40
+ def add_tips_files (bucket_name ):
41
+ for s3_key , file_name in test_s3_files :
42
+ with open (file_name , 'rb' ) as f :
43
+ conn .Bucket (bucket_name ).put_object (
44
+ Key = s3_key ,
45
+ Body = f )
46
+
47
+ boto3 = pytest .importorskip ('boto3' )
48
+ # see gh-16135
49
+ bucket = 'pandas-test'
50
+
51
+ conn = boto3 .resource ("s3" , region_name = "us-east-1" )
52
+ conn .create_bucket (Bucket = bucket )
53
+ add_tips_files (bucket )
54
+
55
+ conn .create_bucket (Bucket = 'cant_get_it' , ACL = 'private' )
56
+ add_tips_files ('cant_get_it' )
57
+
58
+ yield conn
59
+
60
+ moto .mock_s3 ().stop ()
61
+
62
+
22
63
@pytest .mark .network
23
64
@pytest .mark .parametrize (
24
65
"compression,extension" ,
@@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
51
92
52
93
53
94
class TestS3 (object ):
54
-
55
- def setup_method (self , method ):
56
- try :
57
- import s3fs # noqa
58
- except ImportError :
59
- pytest .skip ("s3fs not installed" )
60
-
61
95
@tm .network
62
96
def test_parse_public_s3_bucket (self ):
97
+ pytest .importorskip ('s3fs' )
98
+ # more of an integration test due to the not-public contents portion
99
+ # can probably mock this though.
63
100
for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
64
101
df = read_csv ('s3://pandas-test/tips.csv' +
65
102
ext , compression = comp )
@@ -74,26 +111,24 @@ def test_parse_public_s3_bucket(self):
74
111
assert not df .empty
75
112
tm .assert_frame_equal (read_csv (tm .get_data_path ('tips.csv' )), df )
76
113
77
- @ tm . network
78
- def test_parse_public_s3n_bucket ( self ):
114
+ def test_parse_public_s3n_bucket ( self , test_s3_resource ):
115
+
79
116
# Read from AWS s3 as "s3n" URL
80
117
df = read_csv ('s3n://pandas-test/tips.csv' , nrows = 10 )
81
118
assert isinstance (df , DataFrame )
82
119
assert not df .empty
83
120
tm .assert_frame_equal (read_csv (
84
121
tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
85
122
86
- @tm .network
87
- def test_parse_public_s3a_bucket (self ):
123
+ def test_parse_public_s3a_bucket (self , test_s3_resource ):
88
124
# Read from AWS s3 as "s3a" URL
89
125
df = read_csv ('s3a://pandas-test/tips.csv' , nrows = 10 )
90
126
assert isinstance (df , DataFrame )
91
127
assert not df .empty
92
128
tm .assert_frame_equal (read_csv (
93
129
tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
94
130
95
- @tm .network
96
- def test_parse_public_s3_bucket_nrows (self ):
131
+ def test_parse_public_s3_bucket_nrows (self , test_s3_resource ):
97
132
for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
98
133
df = read_csv ('s3://pandas-test/tips.csv' +
99
134
ext , nrows = 10 , compression = comp )
@@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self):
102
137
tm .assert_frame_equal (read_csv (
103
138
tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
104
139
105
- @tm .network
106
- def test_parse_public_s3_bucket_chunked (self ):
140
+ def test_parse_public_s3_bucket_chunked (self , test_s3_resource ):
107
141
# Read with a chunksize
108
142
chunksize = 5
109
143
local_tips = read_csv (tm .get_data_path ('tips.csv' ))
@@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self):
121
155
chunksize * i_chunk : chunksize * (i_chunk + 1 )]
122
156
tm .assert_frame_equal (true_df , df )
123
157
124
- @tm .network
125
- def test_parse_public_s3_bucket_chunked_python (self ):
158
+ def test_parse_public_s3_bucket_chunked_python (self , test_s3_resource ):
126
159
# Read with a chunksize using the Python parser
127
160
chunksize = 5
128
161
local_tips = read_csv (tm .get_data_path ('tips.csv' ))
@@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self):
140
173
chunksize * i_chunk : chunksize * (i_chunk + 1 )]
141
174
tm .assert_frame_equal (true_df , df )
142
175
143
- @tm .network
144
- def test_parse_public_s3_bucket_python (self ):
176
+ def test_parse_public_s3_bucket_python (self , test_s3_resource ):
145
177
for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
146
178
df = read_csv ('s3://pandas-test/tips.csv' + ext , engine = 'python' ,
147
179
compression = comp )
@@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self):
150
182
tm .assert_frame_equal (read_csv (
151
183
tm .get_data_path ('tips.csv' )), df )
152
184
153
- @tm .network
154
- def test_infer_s3_compression (self ):
185
+ def test_infer_s3_compression (self , test_s3_resource ):
155
186
for ext in ['' , '.gz' , '.bz2' ]:
156
187
df = read_csv ('s3://pandas-test/tips.csv' + ext ,
157
188
engine = 'python' , compression = 'infer' )
@@ -160,8 +191,7 @@ def test_infer_s3_compression(self):
160
191
tm .assert_frame_equal (read_csv (
161
192
tm .get_data_path ('tips.csv' )), df )
162
193
163
- @tm .network
164
- def test_parse_public_s3_bucket_nrows_python (self ):
194
+ def test_parse_public_s3_bucket_nrows_python (self , test_s3_resource ):
165
195
for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
166
196
df = read_csv ('s3://pandas-test/tips.csv' + ext , engine = 'python' ,
167
197
nrows = 10 , compression = comp )
@@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self):
170
200
tm .assert_frame_equal (read_csv (
171
201
tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
172
202
173
- @tm .network
174
- def test_s3_fails (self ):
203
+ def test_s3_fails (self , test_s3_resource ):
175
204
with pytest .raises (IOError ):
176
205
read_csv ('s3://nyqpug/asdf.csv' )
177
206
@@ -180,21 +209,18 @@ def test_s3_fails(self):
180
209
with pytest .raises (IOError ):
181
210
read_csv ('s3://cant_get_it/' )
182
211
183
- @tm .network
184
- def boto3_client_s3 (self ):
212
+ def test_read_csv_handles_boto_s3_object (self ,
213
+ test_s3_resource ,
214
+ tips_file ):
185
215
# see gh-16135
186
216
187
- # boto3 is a dependency of s3fs
188
- import boto3
189
- client = boto3 .client ("s3" )
190
-
191
- key = "/tips.csv"
192
- bucket = "pandas-test"
193
- s3_object = client .get_object (Bucket = bucket , Key = key )
217
+ s3_object = test_s3_resource .meta .client .get_object (
218
+ Bucket = 'pandas-test' ,
219
+ Key = 'tips.csv' )
194
220
195
- result = read_csv (s3_object ["Body" ])
221
+ result = read_csv (BytesIO ( s3_object ["Body" ]. read ()), encoding = 'utf8' )
196
222
assert isinstance (result , DataFrame )
197
223
assert not result .empty
198
224
199
- expected = read_csv (tm . get_data_path ( 'tips.csv' ) )
225
+ expected = read_csv (tips_file )
200
226
tm .assert_frame_equal (result , expected )
0 commit comments