Skip to content

Commit 21a9932

Browse files
committed
ENH: Support fspath protocol
ENH: handle __fspath__ in writers
1 parent 1e59b4c commit 21a9932

14 files changed

+127
-26
lines changed

pandas/io/common.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def _stringify_path(filepath_or_buffer):
157157
-------
158158
str_filepath_or_buffer : a the string version of the input path
159159
"""
160+
if hasattr(filepath_or_buffer, '__fspath__'):
161+
return filepath_or_buffer.__fspath__()
160162
if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path):
161163
return text_type(filepath_or_buffer)
162164
if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath):
@@ -180,10 +182,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
180182
-------
181183
a filepath_or_buffer, the encoding, the compression
182184
"""
185+
filepath_or_buffer = _stringify_path(filepath_or_buffer)
183186

184187
if _is_url(filepath_or_buffer):
185-
url = str(filepath_or_buffer)
186-
req = _urlopen(url)
188+
req = _urlopen(filepath_or_buffer)
187189
content_encoding = req.headers.get('Content-Encoding', None)
188190
if content_encoding == 'gzip':
189191
# Override compression based on Content-Encoding header
@@ -197,9 +199,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
197199
encoding=encoding,
198200
compression=compression)
199201

200-
# Convert pathlib.Path/py.path.local or string
201-
filepath_or_buffer = _stringify_path(filepath_or_buffer)
202-
203202
if isinstance(filepath_or_buffer, (compat.string_types,
204203
compat.binary_type,
205204
mmap.mmap)):

pandas/io/excel.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.io.parsers import TextParser
1919
from pandas.errors import EmptyDataError
2020
from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
21-
get_filepath_or_buffer, _NA_VALUES)
21+
get_filepath_or_buffer, _NA_VALUES,
22+
_stringify_path)
2223
from pandas.core.indexes.period import Period
2324
import pandas._libs.json as json
2425
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -233,7 +234,7 @@ def __init__(self, io, **kwds):
233234
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
234235
"support, current version " + xlrd.__VERSION__)
235236

236-
self.io = io
237+
self.io = _stringify_path(io)
237238

238239
engine = kwds.pop('engine', None)
239240

@@ -242,19 +243,19 @@ def __init__(self, io, **kwds):
242243

243244
# If io is a url, want to keep the data as bytes so can't pass
244245
# to get_filepath_or_buffer()
245-
if _is_url(io):
246-
io = _urlopen(io)
247-
elif not isinstance(io, (ExcelFile, xlrd.Book)):
248-
io, _, _ = get_filepath_or_buffer(io)
249-
250-
if engine == 'xlrd' and isinstance(io, xlrd.Book):
251-
self.book = io
252-
elif not isinstance(io, xlrd.Book) and hasattr(io, "read"):
246+
if _is_url(self.io):
247+
self.io = _urlopen(self.io)
248+
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
249+
self.io, _, _ = get_filepath_or_buffer(self.io)
250+
251+
if engine == 'xlrd' and isinstance(self.io, xlrd.Book):
252+
self.book = self.io
253+
elif not isinstance(self.io, xlrd.Book) and hasattr(self.io, "read"):
253254
# N.B. xlrd.Book has a read attribute too
254-
data = io.read()
255+
data = self.io.read()
255256
self.book = xlrd.open_workbook(file_contents=data)
256-
elif isinstance(io, compat.string_types):
257-
self.book = xlrd.open_workbook(io)
257+
elif isinstance(self.io, compat.string_types):
258+
self.book = xlrd.open_workbook(self.io)
258259
else:
259260
raise ValueError('Must explicitly set engine if not passing in'
260261
' buffer or path for io.')

pandas/io/feather_format.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from distutils.version import LooseVersion
44
from pandas import DataFrame, RangeIndex, Int64Index
55
from pandas.compat import range
6+
from pandas.io.common import _stringify_path
67

78

89
def _try_import():
@@ -43,6 +44,7 @@ def to_feather(df, path):
4344
path : string
4445
File path
4546
"""
47+
path = _stringify_path(path)
4648
if not isinstance(df, DataFrame):
4749
raise ValueError("feather only support IO with DataFrames")
4850

@@ -99,4 +101,5 @@ def read_feather(path):
99101
"""
100102

101103
feather = _try_import()
104+
path = _stringify_path(path)
102105
return feather.read_dataframe(path)

pandas/io/formats/excel.py

+3
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,9 @@ def write(self, writer, sheet_name='Sheet1', startrow=0,
617617
and ``io.excel.xlsm.writer``.
618618
"""
619619
from pandas.io.excel import ExcelWriter
620+
from pandas.io.common import _stringify_path
621+
622+
writer = _stringify_path(writer)
620623
need_save = False
621624
if isinstance(writer, string_types):
622625
writer = ExcelWriter(writer, engine=engine)

pandas/io/formats/format.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
OrderedDict, unichr)
3333
from pandas.io.formats.terminal import get_terminal_size
3434
from pandas.core.config import get_option, set_option
35-
from pandas.io.common import _get_handle, UnicodeWriter, _expand_user
35+
from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user,
36+
_stringify_path)
3637
from pandas.io.formats.printing import adjoin, justify, pprint_thing
3738
from pandas.io.formats.common import get_level_lengths
3839
import pandas.core.common as com
@@ -368,7 +369,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
368369
index_names=True, line_width=None, max_rows=None,
369370
max_cols=None, show_dimensions=False, decimal='.', **kwds):
370371
self.frame = frame
371-
self.buf = _expand_user(buf) if buf is not None else StringIO()
372+
if buf is not None:
373+
self.buf = _expand_user(_stringify_path(buf))
374+
else:
375+
self.buf = StringIO()
372376
self.show_index_names = index_names
373377

374378
if sparsify is None:
@@ -1475,7 +1479,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
14751479
if path_or_buf is None:
14761480
path_or_buf = StringIO()
14771481

1478-
self.path_or_buf = _expand_user(path_or_buf)
1482+
self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
14791483
self.sep = sep
14801484
self.na_rep = na_rep
14811485
self.float_format = float_format

pandas/io/json/json.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from pandas.compat import StringIO, long, u
88
from pandas import compat, isnull
99
from pandas import Series, DataFrame, to_datetime, MultiIndex
10-
from pandas.io.common import get_filepath_or_buffer, _get_handle
10+
from pandas.io.common import (get_filepath_or_buffer, _get_handle,
11+
_stringify_path)
1112
from pandas.core.common import AbstractMethodError
1213
from pandas.io.formats.printing import pprint_thing
1314
from .normalize import _convert_to_line_delimits
@@ -25,6 +26,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
2526
double_precision=10, force_ascii=True, date_unit='ms',
2627
default_handler=None, lines=False):
2728

29+
path_or_buf = _stringify_path(path_or_buf)
2830
if lines and orient != 'records':
2931
raise ValueError(
3032
"'lines' keyword only valid when 'orient' is records")

pandas/io/packers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
from pandas.core.sparse.array import BlockIndex, IntIndex
6262
from pandas.core.generic import NDFrame
6363
from pandas.errors import PerformanceWarning
64-
from pandas.io.common import get_filepath_or_buffer
64+
from pandas.io.common import get_filepath_or_buffer, _stringify_path
6565
from pandas.core.internals import BlockManager, make_block, _safe_reshape
6666
import pandas.core.internals as internals
6767

@@ -149,6 +149,7 @@ def writer(fh):
149149
for a in args:
150150
fh.write(pack(a, **kwargs))
151151

152+
path_or_buf = _stringify_path(path_or_buf)
152153
if isinstance(path_or_buf, compat.string_types):
153154
with open(path_or_buf, mode) as fh:
154155
writer(fh)

pandas/io/pickle.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from numpy.lib.format import read_array, write_array
55
from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
66
from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE
7-
from pandas.io.common import _get_handle, _infer_compression
7+
from pandas.io.common import _get_handle, _infer_compression, _stringify_path
88

99

1010
def to_pickle(obj, path, compression='infer'):
@@ -21,6 +21,7 @@ def to_pickle(obj, path, compression='infer'):
2121
2222
.. versionadded:: 0.20.0
2323
"""
24+
path = _stringify_path(path)
2425
inferred_compression = _infer_compression(path, compression)
2526
f, fh = _get_handle(path, 'wb',
2627
compression=inferred_compression,
@@ -56,7 +57,7 @@ def read_pickle(path, compression='infer'):
5657
-------
5758
unpickled : type of object stored in file
5859
"""
59-
60+
path = _stringify_path(path)
6061
inferred_compression = _infer_compression(path, compression)
6162

6263
def read_wrapper(func):

pandas/io/sas/sasreader.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Read SAS sas7bdat or xport files.
33
"""
44
from pandas import compat
5+
from pandas.io.common import _stringify_path
56

67

78
def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
@@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
3435
buffer_error_msg = ("If this is a buffer object rather "
3536
"than a string name, you must specify "
3637
"a format string")
38+
filepath_or_buffer = _stringify_path(filepath_or_buffer)
3739
if not isinstance(filepath_or_buffer, compat.string_types):
3840
raise ValueError(buffer_error_msg)
3941
try:

pandas/io/stata.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
from pandas.util._decorators import Appender
3131
import pandas as pd
3232

33-
from pandas.io.common import get_filepath_or_buffer, BaseIterator
33+
from pandas.io.common import (get_filepath_or_buffer, BaseIterator,
34+
_stringify_path)
3435
from pandas._libs.lib import max_len_string_array, infer_dtype
3536
from pandas._libs.tslib import NaT, Timestamp
3637

@@ -976,6 +977,7 @@ def __init__(self, path_or_buf, convert_dates=True,
976977
self._lines_read = 0
977978

978979
self._native_byteorder = _set_endianness(sys.byteorder)
980+
path_or_buf = _stringify_path(path_or_buf)
979981
if isinstance(path_or_buf, str):
980982
path_or_buf, encoding, _ = get_filepath_or_buffer(
981983
path_or_buf, encoding=self._default_encoding
672 Bytes
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
A B C
2+
1 2 3
3+
4 5 6

pandas/tests/io/msgpack/data/frame.mp

309 Bytes
Binary file not shown.

pandas/tests/io/test_common.py

+80
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
from os.path import isabs
88

9+
import pandas as pd
910
import pandas.util.testing as tm
1011

1112
from pandas.io import common
@@ -24,6 +25,18 @@
2425
pass
2526

2627

28+
class CustomFSPath(object):
29+
"""For testing fspath on unknown objects"""
30+
def __init__(self, path):
31+
self.path = path
32+
33+
def __fspath__(self):
34+
return self.path
35+
36+
37+
HERE = os.path.dirname(__file__)
38+
39+
2740
class TestCommonIOCapabilities(object):
2841
data1 = """index,A,B,C,D
2942
foo,2,3,4,5
@@ -65,6 +78,11 @@ def test_stringify_path_localpath(self):
6578
lpath = LocalPath(path)
6679
assert common._stringify_path(lpath) == abs_path
6780

81+
def test_stringify_path_fspath(self):
82+
p = CustomFSPath('foo/bar.csv')
83+
result = common._stringify_path(p)
84+
assert result == 'foo/bar.csv'
85+
6886
def test_get_filepath_or_buffer_with_path(self):
6987
filename = '~/sometest'
7088
filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename)
@@ -89,6 +107,68 @@ def test_iterator(self):
89107
tm.assert_frame_equal(first, expected.iloc[[0]])
90108
tm.assert_frame_equal(concat(it), expected.iloc[1:])
91109

110+
@pytest.mark.parametrize('reader, module, path', [
111+
(pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')),
112+
(pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),
113+
(pd.read_fwf, 'os', os.path.join(HERE, 'data',
114+
'fixed_width_format.txt')),
115+
(pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')),
116+
(pd.read_feather, 'feather', os.path.join(HERE, 'data',
117+
'feather-0_3_1.feather')),
118+
(pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf',
119+
'datetimetz_object.h5')),
120+
(pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')),
121+
(pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data',
122+
'test1.sas7bdat')),
123+
(pd.read_json, 'os', os.path.join(HERE, 'json', 'data',
124+
'tsframe_v012.json')),
125+
(pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data',
126+
'frame.mp')),
127+
(pd.read_pickle, 'os', os.path.join(HERE, 'data',
128+
'categorical_0_14_1.pickle')),
129+
])
130+
def test_read_fspath_all(self, reader, module, path):
131+
pytest.importorskip(module)
132+
mypath = CustomFSPath(path)
133+
result = reader(mypath)
134+
expected = reader(mypath)
135+
if path.endswith('.pickle'):
136+
# categorical
137+
tm.assert_categorical_equal(result, expected)
138+
else:
139+
tm.assert_frame_equal(result, expected)
140+
141+
@pytest.mark.parametrize('writer_name, writer_kwargs, module', [
142+
('to_csv', {}, 'os'),
143+
('to_excel', {'engine': 'xlwt'}, 'xlwt'),
144+
('to_feather', {}, 'feather'),
145+
('to_hdf', {'key': 'bar', 'mode': 'w'}, 'tables'),
146+
('to_html', {}, 'os'),
147+
('to_json', {}, 'os'),
148+
('to_latex', {}, 'os'),
149+
('to_msgpack', {}, 'os'),
150+
('to_pickle', {}, 'os'),
151+
('to_stata', {}, 'os'),
152+
])
153+
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
154+
with tm.ensure_clean('foo', return_filelike=False) as path:
155+
pytest.importorskip(module)
156+
mypath = CustomFSPath(path)
157+
df = pd.DataFrame({"A": [1, 2]})
158+
writer = getattr(df, writer_name)
159+
160+
writer(path, **writer_kwargs)
161+
with open(path, 'rb') as f:
162+
expected = f.read()
163+
164+
os.remove(path)
165+
166+
writer(mypath, **writer_kwargs)
167+
with open(path, 'rb') as f:
168+
result = f.read()
169+
170+
assert result == expected
171+
92172

93173
class TestMMapWrapper(object):
94174

0 commit comments

Comments
 (0)