Skip to content

Commit 7d2022a

Browse files
committed
Merge pull request #11714 from parsleyt/excel-s3
ENH: Add support for s3 in read_excel #11447
2 parents 547750a + f4d95b4 commit 7d2022a

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

doc/source/whatsnew/v0.18.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Other enhancements
3232
^^^^^^^^^^^^^^^^^^
3333

3434
- Handle truncated floats in SAS xport files (:issue:`11713`)
35+
- ``read_excel`` now supports s3 urls of the format ``s3://bucketname/filename`` (:issue:`11447`)
3536

3637
.. _whatsnew_0180.enhancements.rounding:
3738

pandas/io/excel.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas.core.frame import DataFrame
1313
from pandas.io.parsers import TextParser
14-
from pandas.io.common import _is_url, _urlopen, _validate_header_arg
14+
from pandas.io.common import _is_url, _urlopen, _validate_header_arg, get_filepath_or_buffer, _is_s3_url
1515
from pandas.tseries.period import Period
1616
from pandas import json
1717
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -199,7 +199,10 @@ def __init__(self, io, **kwds):
199199
raise ValueError("Unknown engine: %s" % engine)
200200

201201
if isinstance(io, compat.string_types):
202-
if _is_url(io):
202+
if _is_s3_url(io):
203+
buffer, _, _ = get_filepath_or_buffer(io)
204+
self.book = xlrd.open_workbook(file_contents=buffer.read())
205+
elif _is_url(io):
203206
data = _urlopen(io).read()
204207
self.book = xlrd.open_workbook(file_contents=data)
205208
else:

pandas/io/tests/test_excel.py

+16
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ def _skip_if_no_excelsuite():
6565
_skip_if_no_openpyxl()
6666

6767

68+
def _skip_if_no_boto():
69+
try:
70+
import boto # NOQA
71+
except ImportError:
72+
raise nose.SkipTest('boto not installed, skipping')
73+
74+
6875
_seriesd = tm.getSeriesData()
6976
_tsd = tm.getTimeSeriesData()
7077
_frame = DataFrame(_seriesd)[:10]
@@ -429,6 +436,15 @@ def test_read_from_http_url(self):
429436
local_table = self.get_exceldf('test1')
430437
tm.assert_frame_equal(url_table, local_table)
431438

439+
@tm.network(check_before_test=True)
440+
def test_read_from_s3_url(self):
441+
_skip_if_no_boto()
442+
443+
url = ('s3://pandas-test/test1' + self.ext)
444+
url_table = read_excel(url)
445+
local_table = self.get_exceldf('test1')
446+
tm.assert_frame_equal(url_table, local_table)
447+
432448
@slow
433449
def test_read_from_file_url(self):
434450

0 commit comments

Comments
 (0)