Skip to content

Commit ad8a189

Browse files
committed
COMPAT: make read_excel accept path objects as filepath
Convert path objects to string before attempting to read Update docstrings read_excel and ExcelFile Add fix to release notes pep8 formatting Just stringify path objects using existing function Condense logic for different file/path types
1 parent 22cf50b commit ad8a189

File tree

3 files changed

+45
-14
lines changed

3 files changed

+45
-14
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,4 @@ Bug Fixes
169169
- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
170170
- Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`)
171171
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
172+
- ``read_excel`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)

pandas/io/excel.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pandas.core.frame import DataFrame
1414
from pandas.io.parsers import TextParser
1515
from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
16-
get_filepath_or_buffer, _is_s3_url)
16+
get_filepath_or_buffer)
1717
from pandas.tseries.period import Period
1818
from pandas import json
1919
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -82,7 +82,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
8282
8383
Parameters
8484
----------
85-
io : string, file-like object, pandas ExcelFile, or xlrd workbook.
85+
io : string, path object (pathlib.Path or py._path.local.LocalPath),
86+
file-like object, pandas ExcelFile, or xlrd workbook.
8687
The string could be a URL. Valid URL schemes include http, ftp, s3,
8788
and file. For file URLs, a host is expected. For instance, a local
8889
file could be file://localhost/path/to/workbook.xlsx
@@ -184,8 +185,9 @@ class ExcelFile(object):
184185
185186
Parameters
186187
----------
187-
io : string, file-like object or xlrd workbook
188-
If a string, expected to be a path to xls or xlsx file
188+
io : string, path object (pathlib.Path or py._path.local.LocalPath),
189+
file-like object or xlrd workbook
190+
If a string or path object, expected to be a path to xls or xlsx file
189191
engine: string, default None
190192
If io is not a buffer or path, this must be set to identify io.
191193
Acceptable values are None or xlrd
@@ -207,21 +209,22 @@ def __init__(self, io, **kwds):
207209
if engine is not None and engine != 'xlrd':
208210
raise ValueError("Unknown engine: %s" % engine)
209211

210-
if isinstance(io, compat.string_types):
211-
if _is_s3_url(io):
212-
buffer, _, _ = get_filepath_or_buffer(io)
213-
self.book = xlrd.open_workbook(file_contents=buffer.read())
214-
elif _is_url(io):
215-
data = _urlopen(io).read()
216-
self.book = xlrd.open_workbook(file_contents=data)
217-
else:
218-
self.book = xlrd.open_workbook(io)
219-
elif engine == 'xlrd' and isinstance(io, xlrd.Book):
212+
# If io is a url, want to keep the data as bytes so can't pass
213+
# to get_filepath_or_buffer()
214+
if _is_url(io):
215+
io = _urlopen(io)
216+
# Deal with S3 urls, path objects, etc. Will convert them to
217+
# buffer or path string
218+
io, _, _ = get_filepath_or_buffer(io)
219+
220+
if engine == 'xlrd' and isinstance(io, xlrd.Book):
220221
self.book = io
221222
elif not isinstance(io, xlrd.Book) and hasattr(io, "read"):
222223
# N.B. xlrd.Book has a read attribute too
223224
data = io.read()
224225
self.book = xlrd.open_workbook(file_contents=data)
226+
elif isinstance(io, compat.string_types):
227+
self.book = xlrd.open_workbook(io)
225228
else:
226229
raise ValueError('Must explicitly set engine if not passing in'
227230
' buffer or path for io.')

pandas/io/tests/test_excel.py

+27
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,33 @@ def test_read_from_file_url(self):
528528

529529
tm.assert_frame_equal(url_table, local_table)
530530

531+
def test_read_from_pathlib_path(self):
532+
tm._skip_if_no_pathlib()
533+
534+
from pathlib import Path
535+
536+
str_path = os.path.join(self.dirpath, 'test1' + self.ext)
537+
expected = read_excel(str_path, 'Sheet1', index_col=0)
538+
539+
path_obj = Path(self.dirpath, 'test1' + self.ext)
540+
actual = read_excel(path_obj, 'Sheet1', index_col=0)
541+
542+
tm.assert_frame_equal(expected, actual)
543+
544+
def test_read_from_py_localpath(self):
545+
tm._skip_if_no_localpath()
546+
547+
from py.path import local as LocalPath
548+
549+
str_path = os.path.join(self.dirpath, 'test1' + self.ext)
550+
expected = read_excel(str_path, 'Sheet1', index_col=0)
551+
552+
abs_dir = os.path.abspath(self.dirpath)
553+
path_obj = LocalPath(abs_dir).join('test1' + self.ext)
554+
actual = read_excel(path_obj, 'Sheet1', index_col=0)
555+
556+
tm.assert_frame_equal(expected, actual)
557+
531558
def test_reader_closes_file(self):
532559

533560
pth = os.path.join(self.dirpath, 'test1' + self.ext)

0 commit comments

Comments
 (0)