Skip to content

Commit 6f9e907

Browse files
chris-b1TomAugspurger
authored andcommitted
BUG: pathlib.Path in io (pandas-dev#16292)
* BUG: pathlib.Path in io * CLN: factor out pathlib roundtrip * add localpath tests for other io * fixup * xfail SAS; type in parser * missing import * xfail for pandas-dev#14704 * fix to_csv * lint * lint cleanup * add feather (xfail) (cherry picked from commit 4cd8458)
1 parent c17a3e9 commit 6f9e907

File tree

12 files changed

+204
-2
lines changed

12 files changed

+204
-2
lines changed

doc/source/whatsnew/v0.20.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ Performance Improvements
3333
Bug Fixes
3434
~~~~~~~~~
3535

36+
- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
37+
3638
Conversion
3739
^^^^^^^^^^
3840

pandas/io/common.py

+3
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
314314

315315
handles = list()
316316
f = path_or_buf
317+
318+
# Convert pathlib.Path/py.path.local or string
319+
path_or_buf = _stringify_path(path_or_buf)
317320
is_path = isinstance(path_or_buf, compat.string_types)
318321

319322
if compression:

pandas/io/formats/format.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
OrderedDict, unichr)
3333
from pandas.io.formats.terminal import get_terminal_size
3434
from pandas.core.config import get_option, set_option
35-
from pandas.io.common import _get_handle, UnicodeWriter, _expand_user
35+
from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user,
36+
_stringify_path)
3637
from pandas.io.formats.printing import adjoin, justify, pprint_thing
3738
from pandas.io.formats.common import get_level_lengths
3839
import pandas.core.common as com
@@ -1475,7 +1476,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
14751476
if path_or_buf is None:
14761477
path_or_buf = StringIO()
14771478

1478-
self.path_or_buf = _expand_user(path_or_buf)
1479+
self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
14791480
self.sep = sep
14801481
self.na_rep = na_rep
14811482
self.float_format = float_format

pandas/tests/io/parser/common.py

+13
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,19 @@ def test_file(self):
679679

680680
tm.assert_frame_equal(url_table, local_table)
681681

682+
def test_path_pathlib(self):
683+
df = tm.makeDataFrame()
684+
result = tm.round_trip_pathlib(df.to_csv,
685+
lambda p: self.read_csv(p, index_col=0))
686+
tm.assert_frame_equal(df, result)
687+
688+
def test_path_localpath(self):
689+
df = tm.makeDataFrame()
690+
result = tm.round_trip_localpath(
691+
df.to_csv,
692+
lambda p: self.read_csv(p, index_col=0))
693+
tm.assert_frame_equal(df, result)
694+
682695
def test_nonexistent_path(self):
683696
# gh-2428: pls no segfault
684697
# gh-14086: raise more helpful FileNotFoundError

pandas/tests/io/sas/test_sas7bdat.py

+24
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pandas.util.testing as tm
44
import os
55
import io
6+
import pytest
67
import numpy as np
78

89

@@ -65,6 +66,29 @@ def test_from_iterator(self):
6566
tm.assert_frame_equal(df, df0.iloc[2:5, :])
6667
rdr.close()
6768

69+
@pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib")
70+
def test_path_pathlib(self):
71+
tm._skip_if_no_pathlib()
72+
from pathlib import Path
73+
for j in 0, 1:
74+
df0 = self.data[j]
75+
for k in self.test_ix[j]:
76+
fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
77+
df = pd.read_sas(fname, encoding='utf-8')
78+
tm.assert_frame_equal(df, df0)
79+
80+
@pytest.mark.xfail(reason="read_sas currently doesn't work with localpath")
81+
def test_path_localpath(self):
82+
tm._skip_if_no_localpath()
83+
from py.path import local as LocalPath
84+
for j in 0, 1:
85+
df0 = self.data[j]
86+
for k in self.test_ix[j]:
87+
fname = LocalPath(os.path.join(self.dirpath,
88+
"test%d.sas7bdat" % k))
89+
df = pd.read_sas(fname, encoding='utf-8')
90+
tm.assert_frame_equal(df, df0)
91+
6892
def test_iterator_loop(self):
6993
# github #13654
7094
for j in 0, 1:

pandas/tests/io/test_excel.py

+10
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,16 @@ def test_freeze_panes(self):
18581858
result = read_excel(path)
18591859
tm.assert_frame_equal(expected, result)
18601860

1861+
def test_path_pathlib(self):
1862+
df = tm.makeDataFrame()
1863+
result = tm.round_trip_pathlib(df.to_excel, pd.read_excel)
1864+
tm.assert_frame_equal(df, result)
1865+
1866+
def test_path_localpath(self):
1867+
df = tm.makeDataFrame()
1868+
result = tm.round_trip_localpath(df.to_excel, pd.read_excel)
1869+
tm.assert_frame_equal(df, result)
1870+
18611871

18621872
def raise_wrapper(major_ver):
18631873
def versioned_raise_wrapper(orig_method):

pandas/tests/io/test_feather.py

+13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from feather import FeatherError
1111
from pandas.util.testing import assert_frame_equal, ensure_clean
12+
import pandas.util.testing as tm
1213

1314

1415
@pytest.mark.single
@@ -114,3 +115,15 @@ def test_write_with_index(self):
114115
df.index = [0, 1, 2]
115116
df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]),
116117
self.check_error_on_write(df, ValueError)
118+
119+
@pytest.mark.xfail(reason="feather currently doesn't work with pathlib")
120+
def test_path_pathlib(self):
121+
df = tm.makeDataFrame().reset_index()
122+
result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
123+
tm.assert_frame_equal(df, result)
124+
125+
@pytest.mark.xfail(reason="feather currently doesn't work with localpath")
126+
def test_path_localpath(self):
127+
df = tm.makeDataFrame().reset_index()
128+
result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
129+
tm.assert_frame_equal(df, result)

pandas/tests/io/test_packers.py

+12
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,18 @@ def test_string_io(self):
134134
result = read_msgpack(p)
135135
tm.assert_frame_equal(result, df)
136136

137+
@pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib")
138+
def test_path_pathlib(self):
139+
df = tm.makeDataFrame()
140+
result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack)
141+
tm.assert_frame_equal(df, result)
142+
143+
@pytest.mark.xfail(reason="msgpack currently doesn't work with localpath")
144+
def test_path_localpath(self):
145+
df = tm.makeDataFrame()
146+
result = tm.round_trip_localpath(df.to_msgpack, read_msgpack)
147+
tm.assert_frame_equal(df, result)
148+
137149
def test_iterator_with_string_io(self):
138150

139151
dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)]

pandas/tests/io/test_pickle.py

+12
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ def test_pickle_v0_15_2():
299299
tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
300300

301301

302+
def test_pickle_path_pathlib():
303+
df = tm.makeDataFrame()
304+
result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
305+
tm.assert_frame_equal(df, result)
306+
307+
308+
def test_pickle_path_localpath():
309+
df = tm.makeDataFrame()
310+
result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
311+
tm.assert_frame_equal(df, result)
312+
313+
302314
# ---------------------
303315
# test pickle compression
304316
# ---------------------

pandas/tests/io/test_pytables.py

+43
Original file line numberDiff line numberDiff line change
@@ -4282,6 +4282,49 @@ def test_select_filter_corner(self):
42824282
result = store.select('frame', [crit])
42834283
tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])
42844284

4285+
def test_path_pathlib(self):
4286+
df = tm.makeDataFrame()
4287+
4288+
result = tm.round_trip_pathlib(
4289+
lambda p: df.to_hdf(p, 'df'),
4290+
lambda p: pd.read_hdf(p, 'df'))
4291+
tm.assert_frame_equal(df, result)
4292+
4293+
@pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore')
4294+
def test_path_pathlib_hdfstore(self):
4295+
df = tm.makeDataFrame()
4296+
4297+
def writer(path):
4298+
with pd.HDFStore(path) as store:
4299+
df.to_hdf(store, 'df')
4300+
4301+
def reader(path):
4302+
with pd.HDFStore(path) as store:
4303+
pd.read_hdf(store, 'df')
4304+
result = tm.round_trip_pathlib(writer, reader)
4305+
tm.assert_frame_equal(df, result)
4306+
4307+
def test_pickle_path_localpath(self):
4308+
df = tm.makeDataFrame()
4309+
result = tm.round_trip_pathlib(
4310+
lambda p: df.to_hdf(p, 'df'),
4311+
lambda p: pd.read_hdf(p, 'df'))
4312+
tm.assert_frame_equal(df, result)
4313+
4314+
@pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore')
4315+
def test_path_localpath_hdfstore(self):
4316+
df = tm.makeDataFrame()
4317+
4318+
def writer(path):
4319+
with pd.HDFStore(path) as store:
4320+
df.to_hdf(store, 'df')
4321+
4322+
def reader(path):
4323+
with pd.HDFStore(path) as store:
4324+
pd.read_hdf(store, 'df')
4325+
result = tm.round_trip_localpath(writer, reader)
4326+
tm.assert_frame_equal(df, result)
4327+
42854328
def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
42864329

42874330
options = {}

pandas/tests/io/test_stata.py

+12
Original file line numberDiff line numberDiff line change
@@ -1283,3 +1283,15 @@ def test_invalid_encoding(self):
12831283
with pytest.raises(ValueError):
12841284
with tm.ensure_clean() as path:
12851285
original.to_stata(path, encoding='utf-8')
1286+
1287+
@pytest.mark.xfail(reason="stata currently doesn't work with pathlib")
1288+
def test_path_pathlib(self):
1289+
df = tm.makeDataFrame()
1290+
result = tm.round_trip_pathlib(df.to_stata, read_stata)
1291+
tm.assert_frame_equal(df, result)
1292+
1293+
@pytest.mark.xfail(reason="stata currently doesn't work with localpath")
1294+
def test_pickle_path_localpath(self):
1295+
df = tm.makeDataFrame()
1296+
result = tm.round_trip_localpath(df.to_stata, read_stata)
1297+
tm.assert_frame_equal(df, result)

pandas/util/testing.py

+57
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,63 @@ def round_trip_pickle(obj, path=None):
117117
return pd.read_pickle(path)
118118

119119

120+
def round_trip_pathlib(writer, reader, path=None):
121+
"""
122+
Write an object to file specifed by a pathlib.Path and read it back
123+
124+
Parameters
125+
----------
126+
writer : callable bound to pandas object
127+
IO writing function (e.g. DataFrame.to_csv )
128+
reader : callable
129+
IO reading function (e.g. pd.read_csv )
130+
path : str, default None
131+
The path where the object is written and then read.
132+
133+
Returns
134+
-------
135+
round_trip_object : pandas object
136+
The original object that was serialized and then re-read.
137+
"""
138+
139+
import pytest
140+
Path = pytest.importorskip('pathlib').Path
141+
if path is None:
142+
path = '___pathlib___'
143+
with ensure_clean(path) as path:
144+
writer(Path(path))
145+
obj = reader(Path(path))
146+
return obj
147+
148+
149+
def round_trip_localpath(writer, reader, path=None):
150+
"""
151+
Write an object to file specifed by a py.path LocalPath and read it back
152+
153+
Parameters
154+
----------
155+
writer : callable bound to pandas object
156+
IO writing function (e.g. DataFrame.to_csv )
157+
reader : callable
158+
IO reading function (e.g. pd.read_csv )
159+
path : str, default None
160+
The path where the object is written and then read.
161+
162+
Returns
163+
-------
164+
round_trip_object : pandas object
165+
The original object that was serialized and then re-read.
166+
"""
167+
import pytest
168+
LocalPath = pytest.importorskip('py.path').local
169+
if path is None:
170+
path = '___localpath___'
171+
with ensure_clean(path) as path:
172+
writer(LocalPath(path))
173+
obj = reader(LocalPath(path))
174+
return obj
175+
176+
120177
def assert_almost_equal(left, right, check_exact=False,
121178
check_dtype='equiv', check_less_precise=False,
122179
**kwargs):

0 commit comments

Comments
 (0)