Skip to content

Commit d8a0b4f

Browse files
committed
ENH: support file-like objects in ExcelFile, close #1529
1 parent cd2002a commit d8a0b4f

File tree

3 files changed

+68
-26
lines changed

3 files changed

+68
-26
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ pandas 0.8.0
136136
- Header argument in DataFrame.to_csv can accept a list of column names to
137137
use instead of the object's columns (#921)
138138
- Add ``raise_conflict`` argument to DataFrame.update (#1526)
139+
- Support file-like objects in ExcelFile (#1529)
139140

140141
**API Changes**
141142

pandas/io/parsers.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,22 +1128,48 @@ class ExcelFile(object):
11281128
11291129
Parameters
11301130
----------
1131-
path : string
1131+
path : string or file-like object
11321132
Path to xls file
1133+
kind : {'xls', 'xlsx', None}, default None
11331134
"""
1134-
def __init__(self, path):
1135+
def __init__(self, path_or_buf):
11351136
self.use_xlsx = True
1136-
if path.endswith('.xls'):
1137-
self.use_xlsx = False
1138-
import xlrd
1139-
self.book = xlrd.open_workbook(path)
1137+
self.path_or_buf = path_or_buf
1138+
self.tmpfile = None
1139+
1140+
if isinstance(path_or_buf, basestring):
1141+
if path_or_buf.endswith('.xls'):
1142+
self.use_xlsx = False
1143+
import xlrd
1144+
self.book = xlrd.open_workbook(path_or_buf)
1145+
else:
1146+
try:
1147+
from openpyxl.reader.excel import load_workbook
1148+
self.book = load_workbook(path_or_buf, use_iterators=True)
1149+
except ImportError: # pragma: no cover
1150+
raise ImportError(_openpyxl_msg)
11401151
else:
1152+
import tempfile
1153+
fd = tempfile.NamedTemporaryFile(delete=False)
1154+
fd.write(path_or_buf.read())
1155+
fd.close()
1156+
11411157
try:
1158+
import xlrd
1159+
self.book = xlrd.open_workbook(fd.name)
1160+
self.use_xlsx = False
1161+
except Exception:
11421162
from openpyxl.reader.excel import load_workbook
1143-
self.book = load_workbook(path, use_iterators=True)
1144-
except ImportError: # pragma: no cover
1145-
raise ImportError(_openpyxl_msg)
1146-
self.path = path
1163+
self.book = load_workbook(fd.name, use_iterators=True)
1164+
1165+
self.tmpfile = fd.name
1166+
1167+
def __name__(self):
1168+
if self.tmpfile:
1169+
try:
1170+
os.remove(self.tmpfile)
1171+
except Exception:
1172+
pass
11471173

11481174
def __repr__(self):
11491175
return object.__repr__(self)

pandas/io/tests/test_parsers.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@
2727
parse_date_time, parse_date_fields, parse_all_fields
2828
)
2929

30+
def _skip_if_no_xlrd():
31+
try:
32+
import xlrd
33+
except ImportError:
34+
raise nose.SkipTest('xlrd not installed, skipping')
35+
36+
def _skip_if_no_openpyxl():
37+
try:
38+
import openpyxl
39+
except ImportError:
40+
raise nose.SkipTest('openpyxl not installed, skipping')
41+
3042

3143
class TestParsers(unittest.TestCase):
3244
data1 = """index,A,B,C,D
@@ -678,32 +690,23 @@ def test_read_csv_no_index_name(self):
678690
assert_frame_equal(df, df2)
679691

680692
def test_excel_stop_iterator(self):
681-
try:
682-
import xlrd
683-
except ImportError:
684-
raise nose.SkipTest('xlrd not installed, skipping')
693+
_skip_if_no_xlrd()
685694

686695
excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
687696
parsed = excel_data.parse('Sheet1')
688697
expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
689698
assert_frame_equal(parsed, expected)
690699

691700
def test_excel_cell_error_na(self):
692-
try:
693-
import xlrd
694-
except ImportError:
695-
raise nose.SkipTest('xlrd not installed, skipping')
701+
_skip_if_no_xlrd()
696702

697703
excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
698704
parsed = excel_data.parse('Sheet1')
699705
expected = DataFrame([[np.nan]], columns=['Test'])
700706
assert_frame_equal(parsed, expected)
701707

702708
def test_excel_table(self):
703-
try:
704-
import xlrd
705-
except ImportError:
706-
raise nose.SkipTest('xlrd not installed, skipping')
709+
_skip_if_no_xlrd()
707710

708711
pth = os.path.join(self.dirpath, 'test.xls')
709712
xls = ExcelFile(pth)
@@ -713,11 +716,23 @@ def test_excel_table(self):
713716
assert_frame_equal(df, df2)
714717
assert_frame_equal(df3, df2)
715718

719+
def test_excel_read_buffer(self):
720+
_skip_if_no_xlrd()
721+
_skip_if_no_openpyxl()
722+
723+
pth = os.path.join(self.dirpath, 'test.xls')
724+
f = open(pth, 'rb')
725+
xls = ExcelFile(f)
726+
# it works
727+
xls.parse('Sheet1', index_col=0, parse_dates=True)
728+
729+
pth = os.path.join(self.dirpath, 'test.xlsx')
730+
f = open(pth, 'rb')
731+
xl = ExcelFile(f)
732+
df = xl.parse('Sheet1', index_col=0, parse_dates=True)
733+
716734
def test_xlsx_table(self):
717-
try:
718-
import openpyxl
719-
except ImportError:
720-
raise nose.SkipTest('openpyxl not installed, skipping')
735+
_skip_if_no_openpyxl()
721736

722737
pth = os.path.join(self.dirpath, 'test.xlsx')
723738
xlsx = ExcelFile(pth)

0 commit comments

Comments
 (0)