Skip to content

Commit 0fbd347

Browse files
committed
Merge pull request #4962 from cancan101/excel_take_workbook
ENH Change ExcelFile to accept a workbook for the path_or_buf argument.
2 parents ed030e9 + 9103322 commit 0fbd347

File tree

3 files changed

+59
-17
lines changed

3 files changed

+59
-17
lines changed

doc/source/release.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ Improvements to existing features
133133
(0.4.3 and 0.5.0) (:issue:`4981`).
134134
- Better string representations of ``MultiIndex`` (including ability to roundtrip
135135
via ``repr``). (:issue:`3347`, :issue:`4935`)
136-
136+
- Both ExcelFile and read_excel to accept an xlrd.Book for the io
137+
(formerly path_or_buf) argument; this requires engine to be set.
138+
(:issue:`4961`).
139+
137140
API Changes
138141
~~~~~~~~~~~
139142

pandas/io/excel.py

+35-16
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,13 @@ def get_writer(engine_name):
4545
except KeyError:
4646
raise ValueError("No Excel writer '%s'" % engine_name)
4747

48-
def read_excel(path_or_buf, sheetname, **kwds):
48+
def read_excel(io, sheetname, **kwds):
4949
"""Read an Excel table into a pandas DataFrame
5050
5151
Parameters
5252
----------
53+
io : string, file-like object or xlrd workbook
54+
If a string, expected to be a path to xls or xlsx file
5355
sheetname : string
5456
Name of Excel sheet
5557
header : int, default 0
@@ -74,7 +76,10 @@ def read_excel(path_or_buf, sheetname, **kwds):
7476
values are overridden, otherwise they're appended to
7577
verbose : boolean, default False
7678
Indicate number of NA values placed in non-numeric columns
77-
79+
engine: string, default None
80+
If io is not a buffer or path, this must be set to identify io.
81+
Acceptable values are None or xlrd
82+
7883
Returns
7984
-------
8085
parsed : DataFrame
@@ -84,7 +89,10 @@ def read_excel(path_or_buf, sheetname, **kwds):
8489
kwds.pop('kind')
8590
warn("kind keyword is no longer supported in read_excel and may be "
8691
"removed in a future version", FutureWarning)
87-
return ExcelFile(path_or_buf).parse(sheetname=sheetname, **kwds)
92+
93+
engine = kwds.pop('engine', None)
94+
95+
return ExcelFile(io, engine=engine).parse(sheetname=sheetname, **kwds)
8896

8997

9098
class ExcelFile(object):
@@ -94,10 +102,13 @@ class ExcelFile(object):
94102
95103
Parameters
96104
----------
97-
path : string or file-like object
98-
Path to xls or xlsx file
105+
io : string, file-like object or xlrd workbook
106+
If a string, expected to be a path to xls or xlsx file
107+
engine: string, default None
108+
If io is not a buffer or path, this must be set to identify io.
109+
Acceptable values are None or xlrd
99110
"""
100-
def __init__(self, path_or_buf, **kwds):
111+
def __init__(self, io, **kwds):
101112

102113
import xlrd # throw an ImportError if we need to
103114

@@ -106,14 +117,22 @@ def __init__(self, path_or_buf, **kwds):
106117
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
107118
"support, current version " + xlrd.__VERSION__)
108119

109-
self.path_or_buf = path_or_buf
110-
self.tmpfile = None
111-
112-
if isinstance(path_or_buf, compat.string_types):
113-
self.book = xlrd.open_workbook(path_or_buf)
114-
else:
115-
data = path_or_buf.read()
120+
self.io = io
121+
122+
engine = kwds.pop('engine', None)
123+
124+
if engine is not None and engine != 'xlrd':
125+
raise ValueError("Unknown engine: %s" % engine)
126+
127+
if isinstance(io, compat.string_types):
128+
self.book = xlrd.open_workbook(io)
129+
elif engine == "xlrd" and isinstance(io, xlrd.Book):
130+
self.book = io
131+
elif hasattr(io, "read"):
132+
data = io.read()
116133
self.book = xlrd.open_workbook(file_contents=data)
134+
else:
135+
raise ValueError('Must explicitly set engine if not passing in buffer or path for io.')
117136

118137
def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
119138
index_col=None, parse_cols=None, parse_dates=False,
@@ -261,9 +280,9 @@ def sheet_names(self):
261280
return self.book.sheet_names()
262281

263282
def close(self):
264-
"""close path_or_buf if necessary"""
265-
if hasattr(self.path_or_buf, 'close'):
266-
self.path_or_buf.close()
283+
"""close io if necessary"""
284+
if hasattr(self.io, 'close'):
285+
self.io.close()
267286

268287
def __enter__(self):
269288
return self

pandas/io/tests/test_excel.py

+20
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,26 @@ def test_excel_read_buffer(self):
254254
f = open(pth, 'rb')
255255
xl = ExcelFile(f)
256256
xl.parse('Sheet1', index_col=0, parse_dates=True)
257+
258+
def test_read_xlrd_Book(self):
259+
_skip_if_no_xlrd()
260+
_skip_if_no_xlwt()
261+
262+
import xlrd
263+
264+
pth = '__tmp_excel_read_worksheet__.xls'
265+
df = self.frame
266+
267+
with ensure_clean(pth) as pth:
268+
df.to_excel(pth, "SheetA")
269+
book = xlrd.open_workbook(pth)
270+
271+
with ExcelFile(book, engine="xlrd") as xl:
272+
result = xl.parse("SheetA")
273+
tm.assert_frame_equal(df, result)
274+
275+
result = read_excel(book, sheetname="SheetA", engine="xlrd")
276+
tm.assert_frame_equal(df, result)
257277

258278
def test_xlsx_table(self):
259279
_skip_if_no_xlrd()

0 commit comments

Comments
 (0)