Skip to content

BUG: pathlib.Path in io #16292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 12, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Performance Improvements
Bug Fixes
~~~~~~~~~

- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)

Conversion
^^^^^^^^^^

Expand Down
3 changes: 3 additions & 0 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,

handles = list()
f = path_or_buf

# Convert pathlib.Path/py.path.local or string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a blank line

path_or_buf = _stringify_path(path_or_buf)
is_path = isinstance(path_or_buf, compat.string_types)

if compression:
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,18 @@ def test_file(self):

tm.assert_frame_equal(url_table, local_table)

def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_csv,
lambda p: self.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)

def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_csv,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

round_trip_localpath ?

lambda p: self.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)

def test_nonexistent_path(self):
# gh-2428: pls no segfault
# gh-14086: raise more helpful FileNotFoundError
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ def test_from_iterator(self):
tm.assert_frame_equal(df, df0.iloc[2:5, :])
rdr.close()

def test_path_pathlib(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like needs xfailing here

tm._skip_if_no_pathlib()
from pathlib import Path
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
df = pd.read_sas(fname, encoding='utf-8')
tm.assert_frame_equal(df, df0)

def test_path_localpath(self):
tm._skip_if_no_localpath()
from py.path import local as LocalPath
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = LocalPath(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
df = pd.read_sas(fname, encoding='utf-8')
tm.assert_frame_equal(df, df0)

def test_iterator_loop(self):
# github #13654
for j in 0, 1:
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1858,6 +1858,16 @@ def test_freeze_panes(self):
result = read_excel(path)
tm.assert_frame_equal(expected, result)

def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_excel, pd.read_excel)
tm.assert_frame_equal(df, result)

def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_excel, pd.read_excel)
tm.assert_frame_equal(df, result)


def raise_wrapper(major_ver):
def versioned_raise_wrapper(orig_method):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,18 @@ def test_string_io(self):
result = read_msgpack(p)
tm.assert_frame_equal(result, df)

@pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib")
def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack)
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason="msgpack currently doesn't work with localpath")
def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_msgpack, read_msgpack)
tm.assert_frame_equal(df, result)

def test_iterator_with_string_io(self):

dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)]
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def python_unpickler(path):
compare_element(result, expected, typ)



def test_pickle_v0_14_1():

cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
Expand Down Expand Up @@ -299,6 +300,17 @@ def test_pickle_v0_15_2():
tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))


def test_pickle_path_pathlib():
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
tm.assert_frame_equal(df, result)


def test_pickle_path_localpath():
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
tm.assert_frame_equal(df, result)

# ---------------------
# test pickle compression
# ---------------------
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4249,6 +4249,18 @@ def test_select_filter_corner(self):
result = store.select('frame', [crit])
tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])

def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(lambda p: df.to_hdf(p, 'df'),
lambda p: pd.read_hdf(p, 'df'))
tm.assert_frame_equal(df, result)

def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(lambda p: df.to_hdf(p, 'df'),
lambda p: pd.read_hdf(p, 'df'))
tm.assert_frame_equal(df, result)

def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):

options = {}
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,3 +1283,15 @@ def test_invalid_encoding(self):
with pytest.raises(ValueError):
with tm.ensure_clean() as path:
original.to_stata(path, encoding='utf-8')

@pytest.mark.xfail(reason="stata currently doesn't work with pathlib")
def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_stata, read_stata)
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason="stata currently doesn't work with localpath")
def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_stata, read_stata)
tm.assert_frame_equal(df, result)
57 changes: 57 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,63 @@ def round_trip_pickle(obj, path=None):
return pd.read_pickle(path)


def round_trip_pathlib(writer, reader, path=None):
"""
Write an object to file specifed by a pathlib.Path and read it back

Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.

Returns
-------
round_trip_object : pandas object
The original object that was serialized and then re-read.
"""

import pytest
Path = pytest.importorskip('pathlib').Path
if path is None:
path = '___pathlib___'
with ensure_clean(path) as path:
writer(Path(path))
obj = reader(Path(path))
return obj


def round_trip_localpath(writer, reader, path=None):
"""
Write an object to file specifed by a py.path LocalPath and read it back

Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.

Returns
-------
round_trip_object : pandas object
The original object that was serialized and then re-read.
"""
import pytest
LocalPath = pytest.importorskip('py.path').local
if path is None:
path = '___localpath___'
with ensure_clean(path) as path:
writer(LocalPath(path))
obj = reader(LocalPath(path))
return obj


def assert_almost_equal(left, right, check_exact=False,
check_dtype='equiv', check_less_precise=False,
**kwargs):
Expand Down