Skip to content

Commit dcb02d2

Browse files
committed
COMPAT: boto import issues
1 parent 43989fd commit dcb02d2

File tree

4 files changed

+130
-108
lines changed

4 files changed

+130
-108
lines changed

pandas/io/common.py

+4-105
Original file line numberDiff line numberDiff line change
@@ -104,85 +104,6 @@ def __next__(self):
104104
BaseIterator.next = lambda self: self.__next__()
105105

106106

107-
try:
108-
from boto.s3 import key
109-
110-
class BotoFileLikeReader(key.Key):
111-
"""boto Key modified to be more file-like
112-
113-
This modification of the boto Key will read through a supplied
114-
S3 key once, then stop. The unmodified boto Key object will repeatedly
115-
cycle through a file in S3: after reaching the end of the file,
116-
boto will close the file. Then the next call to `read` or `next` will
117-
re-open the file and start reading from the beginning.
118-
119-
Also adds a `readline` function which will split the returned
120-
values by the `\n` character.
121-
"""
122-
123-
def __init__(self, *args, **kwargs):
124-
encoding = kwargs.pop("encoding", None) # Python 2 compat
125-
super(BotoFileLikeReader, self).__init__(*args, **kwargs)
126-
# Add a flag to mark the end of the read.
127-
self.finished_read = False
128-
self.buffer = ""
129-
self.lines = []
130-
if encoding is None and compat.PY3:
131-
encoding = "utf-8"
132-
self.encoding = encoding
133-
self.lines = []
134-
135-
def next(self):
136-
return self.readline()
137-
138-
__next__ = next
139-
140-
def read(self, *args, **kwargs):
141-
if self.finished_read:
142-
return b'' if compat.PY3 else ''
143-
return super(BotoFileLikeReader, self).read(*args, **kwargs)
144-
145-
def close(self, *args, **kwargs):
146-
self.finished_read = True
147-
return super(BotoFileLikeReader, self).close(*args, **kwargs)
148-
149-
def seekable(self):
150-
"""Needed for reading by bz2"""
151-
return False
152-
153-
def readline(self):
154-
"""Split the contents of the Key by '\n' characters."""
155-
if self.lines:
156-
retval = self.lines[0]
157-
self.lines = self.lines[1:]
158-
return retval
159-
if self.finished_read:
160-
if self.buffer:
161-
retval, self.buffer = self.buffer, ""
162-
return retval
163-
else:
164-
raise StopIteration
165-
166-
if self.encoding:
167-
self.buffer = "{}{}".format(
168-
self.buffer, self.read(8192).decode(self.encoding))
169-
else:
170-
self.buffer = "{}{}".format(self.buffer, self.read(8192))
171-
172-
split_buffer = self.buffer.split("\n")
173-
self.lines.extend(split_buffer[:-1])
174-
self.buffer = split_buffer[-1]
175-
176-
return self.readline()
177-
except ImportError:
178-
# boto is only needed for reading from S3.
179-
pass
180-
except TypeError:
181-
# boto/boto3 issues
182-
# GH11915
183-
pass
184-
185-
186107
def _is_url(url):
187108
"""Check to see if a URL has a valid protocol.
188109
@@ -319,32 +240,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
319240
return tuple(to_return)
320241

321242
if _is_s3_url(filepath_or_buffer):
322-
try:
323-
import boto
324-
except:
325-
raise ImportError("boto is required to handle s3 files")
326-
# Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
327-
# are environment variables
328-
parsed_url = parse_url(filepath_or_buffer)
329-
s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com')
330-
331-
try:
332-
conn = boto.connect_s3(host=s3_host)
333-
except boto.exception.NoAuthHandlerFound:
334-
conn = boto.connect_s3(host=s3_host, anon=True)
335-
336-
b = conn.get_bucket(parsed_url.netloc, validate=False)
337-
if compat.PY2 and (compression == 'gzip' or
338-
(compression == 'infer' and
339-
filepath_or_buffer.endswith(".gz"))):
340-
k = boto.s3.key.Key(b, parsed_url.path)
341-
filepath_or_buffer = BytesIO(k.get_contents_as_string(
342-
encoding=encoding))
343-
else:
344-
k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
345-
k.open('r') # Expose read errors immediately
346-
filepath_or_buffer = k
347-
return filepath_or_buffer, None, compression
243+
from pandas.io.s3 import get_filepath_or_buffer
244+
return get_filepath_or_buffer(filepath_or_buffer,
245+
encoding=encoding,
246+
compression=compression)
348247

349248
# It is a pathlib.Path/py.path.local or string
350249
filepath_or_buffer = _stringify_path(filepath_or_buffer)

pandas/io/s3.py

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
""" s3 support for remote file interactivity """
2+
3+
import os
4+
from pandas import compat
5+
from pandas.compat import BytesIO
6+
7+
try:
8+
import boto
9+
from boto.s3 import key
10+
except:
11+
raise ImportError("boto is required to handle s3 files")
12+
13+
if compat.PY3:
14+
from urllib.parse import urlparse as parse_url
15+
else:
16+
from urlparse import urlparse as parse_url
17+
18+
19+
class BotoFileLikeReader(key.Key):
20+
"""boto Key modified to be more file-like
21+
22+
This modification of the boto Key will read through a supplied
23+
S3 key once, then stop. The unmodified boto Key object will repeatedly
24+
cycle through a file in S3: after reaching the end of the file,
25+
boto will close the file. Then the next call to `read` or `next` will
26+
re-open the file and start reading from the beginning.
27+
28+
Also adds a `readline` function which will split the returned
29+
values by the `\n` character.
30+
"""
31+
32+
def __init__(self, *args, **kwargs):
33+
encoding = kwargs.pop("encoding", None) # Python 2 compat
34+
super(BotoFileLikeReader, self).__init__(*args, **kwargs)
35+
# Add a flag to mark the end of the read.
36+
self.finished_read = False
37+
self.buffer = ""
38+
self.lines = []
39+
if encoding is None and compat.PY3:
40+
encoding = "utf-8"
41+
self.encoding = encoding
42+
self.lines = []
43+
44+
def next(self):
45+
return self.readline()
46+
47+
__next__ = next
48+
49+
def read(self, *args, **kwargs):
50+
if self.finished_read:
51+
return b'' if compat.PY3 else ''
52+
return super(BotoFileLikeReader, self).read(*args, **kwargs)
53+
54+
def close(self, *args, **kwargs):
55+
self.finished_read = True
56+
return super(BotoFileLikeReader, self).close(*args, **kwargs)
57+
58+
def seekable(self):
59+
"""Needed for reading by bz2"""
60+
return False
61+
62+
def readline(self):
63+
"""Split the contents of the Key by '\n' characters."""
64+
if self.lines:
65+
retval = self.lines[0]
66+
self.lines = self.lines[1:]
67+
return retval
68+
if self.finished_read:
69+
if self.buffer:
70+
retval, self.buffer = self.buffer, ""
71+
return retval
72+
else:
73+
raise StopIteration
74+
75+
if self.encoding:
76+
self.buffer = "{}{}".format(
77+
self.buffer, self.read(8192).decode(self.encoding))
78+
else:
79+
self.buffer = "{}{}".format(self.buffer, self.read(8192))
80+
81+
split_buffer = self.buffer.split("\n")
82+
self.lines.extend(split_buffer[:-1])
83+
self.buffer = split_buffer[-1]
84+
85+
return self.readline()
86+
87+
88+
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
89+
compression=None):
90+
91+
# Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
92+
# are environment variables
93+
parsed_url = parse_url(filepath_or_buffer)
94+
s3_host = os.environ.get('AWS_S3_HOST', 's3.amazonaws.com')
95+
96+
try:
97+
conn = boto.connect_s3(host=s3_host)
98+
except boto.exception.NoAuthHandlerFound:
99+
conn = boto.connect_s3(host=s3_host, anon=True)
100+
101+
b = conn.get_bucket(parsed_url.netloc, validate=False)
102+
if compat.PY2 and (compression == 'gzip' or
103+
(compression == 'infer' and
104+
filepath_or_buffer.endswith(".gz"))):
105+
k = boto.s3.key.Key(b, parsed_url.path)
106+
filepath_or_buffer = BytesIO(k.get_contents_as_string(
107+
encoding=encoding))
108+
else:
109+
k = BotoFileLikeReader(b, parsed_url.path, encoding=encoding)
110+
k.open('r') # Expose read errors immediately
111+
filepath_or_buffer = k
112+
return filepath_or_buffer, None, compression

pandas/io/tests/test_data.py

-3
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,6 @@ def test_options_source_warning(self):
472472

473473

474474
class TestDataReader(tm.TestCase):
475-
def test_is_s3_url(self):
476-
from pandas.io.common import _is_s3_url
477-
self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com"))
478475

479476
@network
480477
def test_read_yahoo(self):

pandas/io/tests/test_s3.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import nose
2+
from pandas.util import testing as tm
3+
4+
from pandas.io.common import _is_s3_url
5+
6+
7+
class TestS3URL(tm.TestCase):
8+
def test_is_s3_url(self):
9+
self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com"))
10+
self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com"))
11+
12+
if __name__ == '__main__':
13+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
14+
exit=False)

0 commit comments

Comments
 (0)