Skip to content

Commit 961a314

Browse files
committed
ENH: Support fspath protocol
Ensures that most of pandas readers and writers will honor the fspath protocol, if an object defines it. TST: remove old xfails
1 parent e0bbaff commit 961a314

22 files changed

+165
-54
lines changed

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ Check the :ref:`API Changes <whatsnew_0210.api_breaking>` and :ref:`deprecations
2020
New features
2121
~~~~~~~~~~~~
2222

23+
- Support for `PEP 519 -- Adding a file system path protocol <https://www.python.org/dev/peps/pep-0519/>`_ on most readers and writers (:issue:`13823`)
24+
2325

2426

2527
.. _whatsnew_0210.enhancements.other:

pandas/io/common.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -146,17 +146,29 @@ def _validate_header_arg(header):
146146

147147

148148
def _stringify_path(filepath_or_buffer):
149-
"""Return the argument coerced to a string if it was a pathlib.Path
150-
or a py.path.local
149+
"""Attempt to convert a path-like object to a string.
151150
152151
Parameters
153152
----------
154153
filepath_or_buffer : object to be converted
155154
156155
Returns
157156
-------
158-
str_filepath_or_buffer : a the string version of the input path
157+
str_filepath_or_buffer : maybe a string version of the object
158+
159+
Notes
160+
-----
161+
Objects supporting the fspath protocol (python 3.6+) are coerced
162+
according to its __fspath__ method.
163+
164+
For backwards compatibility with older pythons, pathlib.Path and
165+
py.path objects are specially coerced.
166+
167+
Any other object is passed through unchanged, which includes bytes,
168+
strings, buffers, or anything else that's not even path-like.
159169
"""
170+
if hasattr(filepath_or_buffer, '__fspath__'):
171+
return filepath_or_buffer.__fspath__()
160172
if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path):
161173
return text_type(filepath_or_buffer)
162174
if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath):
@@ -180,10 +192,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
180192
-------
181193
a filepath_or_buffer, the encoding, the compression
182194
"""
195+
filepath_or_buffer = _stringify_path(filepath_or_buffer)
183196

184197
if _is_url(filepath_or_buffer):
185-
url = str(filepath_or_buffer)
186-
req = _urlopen(url)
198+
req = _urlopen(filepath_or_buffer)
187199
content_encoding = req.headers.get('Content-Encoding', None)
188200
if content_encoding == 'gzip':
189201
# Override compression based on Content-Encoding header
@@ -197,9 +209,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
197209
encoding=encoding,
198210
compression=compression)
199211

200-
# Convert pathlib.Path/py.path.local or string
201-
filepath_or_buffer = _stringify_path(filepath_or_buffer)
202-
203212
if isinstance(filepath_or_buffer, (compat.string_types,
204213
compat.binary_type,
205214
mmap.mmap)):

pandas/io/excel.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.io.parsers import TextParser
1919
from pandas.errors import EmptyDataError
2020
from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
21-
get_filepath_or_buffer, _NA_VALUES)
21+
get_filepath_or_buffer, _NA_VALUES,
22+
_stringify_path)
2223
from pandas.core.indexes.period import Period
2324
import pandas._libs.json as json
2425
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -233,7 +234,10 @@ def __init__(self, io, **kwds):
233234
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
234235
"support, current version " + xlrd.__VERSION__)
235236

237+
# could be a str, ExcelFile, Book, etc.
236238
self.io = io
239+
# Always a string
240+
self._io = _stringify_path(io)
237241

238242
engine = kwds.pop('engine', None)
239243

@@ -242,19 +246,19 @@ def __init__(self, io, **kwds):
242246

243247
# If io is a url, want to keep the data as bytes so can't pass
244248
# to get_filepath_or_buffer()
245-
if _is_url(io):
246-
io = _urlopen(io)
247-
elif not isinstance(io, (ExcelFile, xlrd.Book)):
248-
io, _, _ = get_filepath_or_buffer(io)
249+
if _is_url(self._io):
250+
io = _urlopen(self._io)
251+
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
252+
io, _, _ = get_filepath_or_buffer(self._io)
249253

250254
if engine == 'xlrd' and isinstance(io, xlrd.Book):
251255
self.book = io
252256
elif not isinstance(io, xlrd.Book) and hasattr(io, "read"):
253257
# N.B. xlrd.Book has a read attribute too
254258
data = io.read()
255259
self.book = xlrd.open_workbook(file_contents=data)
256-
elif isinstance(io, compat.string_types):
257-
self.book = xlrd.open_workbook(io)
260+
elif isinstance(self._io, compat.string_types):
261+
self.book = xlrd.open_workbook(self._io)
258262
else:
259263
raise ValueError('Must explicitly set engine if not passing in'
260264
' buffer or path for io.')

pandas/io/feather_format.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from distutils.version import LooseVersion
44
from pandas import DataFrame, RangeIndex, Int64Index
55
from pandas.compat import range
6+
from pandas.io.common import _stringify_path
67

78

89
def _try_import():
@@ -43,6 +44,7 @@ def to_feather(df, path):
4344
path : string
4445
File path
4546
"""
47+
path = _stringify_path(path)
4648
if not isinstance(df, DataFrame):
4749
raise ValueError("feather only support IO with DataFrames")
4850

@@ -99,4 +101,5 @@ def read_feather(path):
99101
"""
100102

101103
feather = _try_import()
104+
path = _stringify_path(path)
102105
return feather.read_dataframe(path)

pandas/io/formats/excel.py

+3
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,9 @@ def write(self, writer, sheet_name='Sheet1', startrow=0,
617617
and ``io.excel.xlsm.writer``.
618618
"""
619619
from pandas.io.excel import ExcelWriter
620+
from pandas.io.common import _stringify_path
621+
622+
writer = _stringify_path(writer)
620623
need_save = False
621624
if isinstance(writer, string_types):
622625
writer = ExcelWriter(writer, engine=engine)

pandas/io/formats/format.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
369369
index_names=True, line_width=None, max_rows=None,
370370
max_cols=None, show_dimensions=False, decimal='.', **kwds):
371371
self.frame = frame
372-
self.buf = _expand_user(buf) if buf is not None else StringIO()
372+
if buf is not None:
373+
self.buf = _expand_user(_stringify_path(buf))
374+
else:
375+
self.buf = StringIO()
373376
self.show_index_names = index_names
374377

375378
if sparsify is None:

pandas/io/json/json.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from pandas.compat import StringIO, long, u
88
from pandas import compat, isnull
99
from pandas import Series, DataFrame, to_datetime, MultiIndex
10-
from pandas.io.common import get_filepath_or_buffer, _get_handle
10+
from pandas.io.common import (get_filepath_or_buffer, _get_handle,
11+
_stringify_path)
1112
from pandas.core.common import AbstractMethodError
1213
from pandas.io.formats.printing import pprint_thing
1314
from .normalize import _convert_to_line_delimits
@@ -25,6 +26,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
2526
double_precision=10, force_ascii=True, date_unit='ms',
2627
default_handler=None, lines=False):
2728

29+
path_or_buf = _stringify_path(path_or_buf)
2830
if lines and orient != 'records':
2931
raise ValueError(
3032
"'lines' keyword only valid when 'orient' is records")

pandas/io/packers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
from pandas.core.sparse.array import BlockIndex, IntIndex
6262
from pandas.core.generic import NDFrame
6363
from pandas.errors import PerformanceWarning
64-
from pandas.io.common import get_filepath_or_buffer
64+
from pandas.io.common import get_filepath_or_buffer, _stringify_path
6565
from pandas.core.internals import BlockManager, make_block, _safe_reshape
6666
import pandas.core.internals as internals
6767

@@ -149,6 +149,7 @@ def writer(fh):
149149
for a in args:
150150
fh.write(pack(a, **kwargs))
151151

152+
path_or_buf = _stringify_path(path_or_buf)
152153
if isinstance(path_or_buf, compat.string_types):
153154
with open(path_or_buf, mode) as fh:
154155
writer(fh)

pandas/io/pickle.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from numpy.lib.format import read_array, write_array
55
from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
66
from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE
7-
from pandas.io.common import _get_handle, _infer_compression
7+
from pandas.io.common import _get_handle, _infer_compression, _stringify_path
88

99

1010
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
@@ -34,6 +34,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
3434
3535
3636
"""
37+
path = _stringify_path(path)
3738
inferred_compression = _infer_compression(path, compression)
3839
f, fh = _get_handle(path, 'wb',
3940
compression=inferred_compression,
@@ -71,7 +72,7 @@ def read_pickle(path, compression='infer'):
7172
-------
7273
unpickled : type of object stored in file
7374
"""
74-
75+
path = _stringify_path(path)
7576
inferred_compression = _infer_compression(path, compression)
7677

7778
def read_wrapper(func):

pandas/io/pytables.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,17 @@ def read_hdf(path_or_buf, key=None, **kwargs):
309309
if 'where' in kwargs:
310310
kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)
311311

312-
path_or_buf = _stringify_path(path_or_buf)
313-
if isinstance(path_or_buf, string_types):
312+
if isinstance(path_or_buf, HDFStore):
313+
if not path_or_buf.is_open:
314+
raise IOError('The HDFStore must be open for reading.')
314315

316+
store = path_or_buf
317+
auto_close = False
318+
else:
319+
path_or_buf = _stringify_path(path_or_buf)
320+
if not isinstance(path_or_buf, string_types):
321+
raise NotImplementedError('Support for generic buffers has not '
322+
'been implemented.')
315323
try:
316324
exists = os.path.exists(path_or_buf)
317325

@@ -323,22 +331,11 @@ def read_hdf(path_or_buf, key=None, **kwargs):
323331
raise compat.FileNotFoundError(
324332
'File %s does not exist' % path_or_buf)
325333

334+
store = HDFStore(path_or_buf, **kwargs)
326335
# can't auto open/close if we are using an iterator
327336
# so delegate to the iterator
328-
store = HDFStore(path_or_buf, **kwargs)
329337
auto_close = True
330338

331-
elif isinstance(path_or_buf, HDFStore):
332-
if not path_or_buf.is_open:
333-
raise IOError('The HDFStore must be open for reading.')
334-
335-
store = path_or_buf
336-
auto_close = False
337-
338-
else:
339-
raise NotImplementedError('Support for generic buffers has not been '
340-
'implemented.')
341-
342339
try:
343340
if key is None:
344341
groups = store.groups()

pandas/io/sas/sasreader.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Read SAS sas7bdat or xport files.
33
"""
44
from pandas import compat
5+
from pandas.io.common import _stringify_path
56

67

78
def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
@@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
3435
buffer_error_msg = ("If this is a buffer object rather "
3536
"than a string name, you must specify "
3637
"a format string")
38+
filepath_or_buffer = _stringify_path(filepath_or_buffer)
3739
if not isinstance(filepath_or_buffer, compat.string_types):
3840
raise ValueError(buffer_error_msg)
3941
try:

pandas/io/stata.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
from pandas.util._decorators import Appender
3131
import pandas as pd
3232

33-
from pandas.io.common import get_filepath_or_buffer, BaseIterator
33+
from pandas.io.common import (get_filepath_or_buffer, BaseIterator,
34+
_stringify_path)
3435
from pandas._libs.lib import max_len_string_array, infer_dtype
3536
from pandas._libs.tslib import NaT, Timestamp
3637

@@ -976,6 +977,7 @@ def __init__(self, path_or_buf, convert_dates=True,
976977
self._lines_read = 0
977978

978979
self._native_byteorder = _set_endianness(sys.byteorder)
980+
path_or_buf = _stringify_path(path_or_buf)
979981
if isinstance(path_or_buf, str):
980982
path_or_buf, encoding, _ = get_filepath_or_buffer(
981983
path_or_buf, encoding=self._default_encoding
@@ -1930,7 +1932,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
19301932
if byteorder is None:
19311933
byteorder = sys.byteorder
19321934
self._byteorder = _set_endianness(byteorder)
1933-
self._fname = fname
1935+
self._fname = _stringify_path(fname)
19341936
self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8}
19351937

19361938
def _write(self, to_write):
672 Bytes
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
A B C
2+
1 2 3
3+
4 5 6

pandas/tests/io/msgpack/data/frame.mp

309 Bytes
Binary file not shown.

pandas/tests/io/sas/test_sas7bdat.py

-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import pandas.util.testing as tm
44
import os
55
import io
6-
import pytest
76
import numpy as np
87

98

@@ -66,7 +65,6 @@ def test_from_iterator(self):
6665
tm.assert_frame_equal(df, df0.iloc[2:5, :])
6766
rdr.close()
6867

69-
@pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib")
7068
def test_path_pathlib(self):
7169
tm._skip_if_no_pathlib()
7270
from pathlib import Path
@@ -77,7 +75,6 @@ def test_path_pathlib(self):
7775
df = pd.read_sas(fname, encoding='utf-8')
7876
tm.assert_frame_equal(df, df0)
7977

80-
@pytest.mark.xfail(reason="read_sas currently doesn't work with localpath")
8178
def test_path_localpath(self):
8279
tm._skip_if_no_localpath()
8380
from py.path import local as LocalPath

0 commit comments

Comments
 (0)