Skip to content

ENH: Support fspath protocol #16301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Check the :ref:`API Changes <whatsnew_0210.api_breaking>` and :ref:`deprecations
New features
~~~~~~~~~~~~

- Support for `PEP 519 -- Adding a file system path protocol
<https://www.python.org/dev/peps/pep-0519/>`_ on most readers and writers (:issue:`13823`)
- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`,
and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)


.. _whatsnew_0210.enhancements.other:
Expand All @@ -40,6 +44,8 @@ Backwards incompatible API changes
- Support has been dropped for Python 3.4 (:issue:`15251`)
- The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`)

- Accessing a non-existent attribute on a closed :class:`HDFStore` will now
raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)

.. _whatsnew_0210.api:

Expand Down
25 changes: 17 additions & 8 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,29 @@ def _validate_header_arg(header):


def _stringify_path(filepath_or_buffer):
"""Return the argument coerced to a string if it was a pathlib.Path
or a py.path.local
"""Attempt to convert a path-like object to a string.

Parameters
----------
filepath_or_buffer : object to be converted

Returns
-------
str_filepath_or_buffer : a the string version of the input path
str_filepath_or_buffer : maybe a string version of the object

Notes
-----
Objects supporting the fspath protocol (python 3.6+) are coerced
according to its __fspath__ method.

For backwards compatibility with older pythons, pathlib.Path and
py.path objects are specially coerced.

Any other object is passed through unchanged, which includes bytes,
strings, buffers, or anything else that's not even path-like.
"""
if hasattr(filepath_or_buffer, '__fspath__'):
return filepath_or_buffer.__fspath__()
if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path):
return text_type(filepath_or_buffer)
if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath):
Expand All @@ -180,10 +192,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
-------
a filepath_or_buffer, the encoding, the compression
"""
filepath_or_buffer = _stringify_path(filepath_or_buffer)

if _is_url(filepath_or_buffer):
url = str(filepath_or_buffer)
req = _urlopen(url)
req = _urlopen(filepath_or_buffer)
content_encoding = req.headers.get('Content-Encoding', None)
if content_encoding == 'gzip':
# Override compression based on Content-Encoding header
Expand All @@ -197,9 +209,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
encoding=encoding,
compression=compression)

# Convert pathlib.Path/py.path.local or string
filepath_or_buffer = _stringify_path(filepath_or_buffer)

if isinstance(filepath_or_buffer, (compat.string_types,
compat.binary_type,
mmap.mmap)):
Expand Down
24 changes: 17 additions & 7 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from pandas.io.parsers import TextParser
from pandas.errors import EmptyDataError
from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
get_filepath_or_buffer, _NA_VALUES)
get_filepath_or_buffer, _NA_VALUES,
_stringify_path)
from pandas.core.indexes.period import Period
import pandas._libs.json as json
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
Expand Down Expand Up @@ -233,7 +234,10 @@ def __init__(self, io, **kwds):
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
"support, current version " + xlrd.__VERSION__)

# could be a str, ExcelFile, Book, etc.
self.io = io
# Always a string
self._io = _stringify_path(io)

engine = kwds.pop('engine', None)

Expand All @@ -242,23 +246,26 @@ def __init__(self, io, **kwds):

# If io is a url, want to keep the data as bytes so can't pass
# to get_filepath_or_buffer()
if _is_url(io):
io = _urlopen(io)
elif not isinstance(io, (ExcelFile, xlrd.Book)):
io, _, _ = get_filepath_or_buffer(io)
if _is_url(self._io):
io = _urlopen(self._io)
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
io, _, _ = get_filepath_or_buffer(self._io)

if engine == 'xlrd' and isinstance(io, xlrd.Book):
self.book = io
elif not isinstance(io, xlrd.Book) and hasattr(io, "read"):
# N.B. xlrd.Book has a read attribute too
data = io.read()
self.book = xlrd.open_workbook(file_contents=data)
elif isinstance(io, compat.string_types):
self.book = xlrd.open_workbook(io)
elif isinstance(self._io, compat.string_types):
self.book = xlrd.open_workbook(self._io)
else:
raise ValueError('Must explicitly set engine if not passing in'
' buffer or path for io.')

def __fspath__(self):
return self._io

def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
names=None, index_col=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
Expand Down Expand Up @@ -754,6 +761,9 @@ def __init__(self, path, engine=None,
else:
self.datetime_format = datetime_format

def __fspath__(self):
return _stringify_path(self.path)

def _get_sheet_name(self, sheet_name):
if sheet_name is None:
sheet_name = self.cur_sheet
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from distutils.version import LooseVersion
from pandas import DataFrame, RangeIndex, Int64Index
from pandas.compat import range
from pandas.io.common import _stringify_path


def _try_import():
Expand Down Expand Up @@ -43,6 +44,7 @@ def to_feather(df, path):
path : string
File path
"""
path = _stringify_path(path)
if not isinstance(df, DataFrame):
raise ValueError("feather only support IO with DataFrames")

Expand Down Expand Up @@ -99,4 +101,5 @@ def read_feather(path):
"""

feather = _try_import()
path = _stringify_path(path)
return feather.read_dataframe(path)
11 changes: 7 additions & 4 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas.compat import reduce, string_types
from pandas.compat import reduce
from pandas.io.formats.css import CSSResolver, CSSWarning
from pandas.io.formats.printing import pprint_thing
from pandas.core.dtypes.common import is_float
Expand Down Expand Up @@ -617,9 +617,12 @@ def write(self, writer, sheet_name='Sheet1', startrow=0,
and ``io.excel.xlsm.writer``.
"""
from pandas.io.excel import ExcelWriter
need_save = False
if isinstance(writer, string_types):
writer = ExcelWriter(writer, engine=engine)
from pandas.io.common import _stringify_path

if isinstance(writer, ExcelWriter):
need_save = False
else:
writer = ExcelWriter(_stringify_path(writer), engine=engine)
need_save = True

formatted_cells = self.get_formatted_cells()
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
index_names=True, line_width=None, max_rows=None,
max_cols=None, show_dimensions=False, decimal='.', **kwds):
self.frame = frame
self.buf = _expand_user(buf) if buf is not None else StringIO()
if buf is not None:
self.buf = _expand_user(_stringify_path(buf))
else:
self.buf = StringIO()
self.show_index_names = index_names

if sparsify is None:
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from pandas.compat import StringIO, long, u
from pandas import compat, isnull
from pandas import Series, DataFrame, to_datetime, MultiIndex
from pandas.io.common import get_filepath_or_buffer, _get_handle
from pandas.io.common import (get_filepath_or_buffer, _get_handle,
_stringify_path)
from pandas.core.common import AbstractMethodError
from pandas.io.formats.printing import pprint_thing
from .normalize import _convert_to_line_delimits
Expand All @@ -25,6 +26,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False):

path_or_buf = _stringify_path(path_or_buf)
if lines and orient != 'records':
raise ValueError(
"'lines' keyword only valid when 'orient' is records")
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from pandas.core.sparse.array import BlockIndex, IntIndex
from pandas.core.generic import NDFrame
from pandas.errors import PerformanceWarning
from pandas.io.common import get_filepath_or_buffer
from pandas.io.common import get_filepath_or_buffer, _stringify_path
from pandas.core.internals import BlockManager, make_block, _safe_reshape
import pandas.core.internals as internals

Expand Down Expand Up @@ -149,6 +149,7 @@ def writer(fh):
for a in args:
fh.write(pack(a, **kwargs))

path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, compat.string_types):
with open(path_or_buf, mode) as fh:
writer(fh)
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numpy.lib.format import read_array, write_array
from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE
from pandas.io.common import _get_handle, _infer_compression
from pandas.io.common import _get_handle, _infer_compression, _stringify_path


def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
Expand Down Expand Up @@ -34,6 +34,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):


"""
path = _stringify_path(path)
inferred_compression = _infer_compression(path, compression)
f, fh = _get_handle(path, 'wb',
compression=inferred_compression,
Expand Down Expand Up @@ -71,7 +72,7 @@ def read_pickle(path, compression='infer'):
-------
unpickled : type of object stored in file
"""

path = _stringify_path(path)
inferred_compression = _infer_compression(path, compression)

def read_wrapper(func):
Expand Down
31 changes: 15 additions & 16 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,17 @@ def read_hdf(path_or_buf, key=None, **kwargs):
if 'where' in kwargs:
kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)

path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, string_types):
if isinstance(path_or_buf, HDFStore):
if not path_or_buf.is_open:
raise IOError('The HDFStore must be open for reading.')

store = path_or_buf
auto_close = False
else:
path_or_buf = _stringify_path(path_or_buf)
if not isinstance(path_or_buf, string_types):
raise NotImplementedError('Support for generic buffers has not '
'been implemented.')
try:
exists = os.path.exists(path_or_buf)

Expand All @@ -323,22 +331,11 @@ def read_hdf(path_or_buf, key=None, **kwargs):
raise compat.FileNotFoundError(
'File %s does not exist' % path_or_buf)

store = HDFStore(path_or_buf, **kwargs)
# can't auto open/close if we are using an iterator
# so delegate to the iterator
store = HDFStore(path_or_buf, **kwargs)
auto_close = True

elif isinstance(path_or_buf, HDFStore):
if not path_or_buf.is_open:
raise IOError('The HDFStore must be open for reading.')

store = path_or_buf
auto_close = False

else:
raise NotImplementedError('Support for generic buffers has not been '
'implemented.')

try:
if key is None:
groups = store.groups()
Expand Down Expand Up @@ -440,7 +437,7 @@ def __init__(self, path, mode=None, complevel=None, complib=None,
"complib only supports {libs} compression.".format(
libs=tables.filters.all_complibs))

self._path = path
self._path = _stringify_path(path)
if mode is None:
mode = 'a'
self._mode = mode
Expand All @@ -451,6 +448,9 @@ def __init__(self, path, mode=None, complevel=None, complib=None,
self._filters = None
self.open(mode=mode, **kwargs)

def __fspath__(self):
return self._path

@property
def root(self):
""" return the root node """
Expand All @@ -472,7 +472,6 @@ def __delitem__(self, key):

def __getattr__(self, name):
""" allow attribute access to get stores """
self._check_if_open()
try:
return self.get(name)
except:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/sas/sasreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Read SAS sas7bdat or xport files.
"""
from pandas import compat
from pandas.io.common import _stringify_path


def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
Expand Down Expand Up @@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
buffer_error_msg = ("If this is a buffer object rather "
"than a string name, you must specify "
"a format string")
filepath_or_buffer = _stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, compat.string_types):
raise ValueError(buffer_error_msg)
try:
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
from pandas.util._decorators import Appender
import pandas as pd

from pandas.io.common import get_filepath_or_buffer, BaseIterator
from pandas.io.common import (get_filepath_or_buffer, BaseIterator,
_stringify_path)
from pandas._libs.lib import max_len_string_array, infer_dtype
from pandas._libs.tslib import NaT, Timestamp

Expand Down Expand Up @@ -976,6 +977,7 @@ def __init__(self, path_or_buf, convert_dates=True,
self._lines_read = 0

self._native_byteorder = _set_endianness(sys.byteorder)
path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, str):
path_or_buf, encoding, _ = get_filepath_or_buffer(
path_or_buf, encoding=self._default_encoding
Expand Down Expand Up @@ -1930,7 +1932,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
if byteorder is None:
byteorder = sys.byteorder
self._byteorder = _set_endianness(byteorder)
self._fname = fname
self._fname = _stringify_path(fname)
self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8}

def _write(self, to_write):
Expand Down
Binary file added pandas/tests/io/data/feather-0_3_1.feather
Binary file not shown.
3 changes: 3 additions & 0 deletions pandas/tests/io/data/fixed_width_format.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A B C
1 2 3
4 5 6
Binary file added pandas/tests/io/msgpack/data/frame.mp
Binary file not shown.
Loading