Skip to content

BUG: pathlib.Path in io #16292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 12, 2017
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Performance Improvements
Bug Fixes
~~~~~~~~~

- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)

Conversion
^^^^^^^^^^

Expand Down
3 changes: 3 additions & 0 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,

handles = list()
f = path_or_buf

# Convert pathlib.Path/py.path.local or string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a blank line

path_or_buf = _stringify_path(path_or_buf)
is_path = isinstance(path_or_buf, compat.string_types)

if compression:
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
OrderedDict, unichr)
from pandas.io.formats.terminal import get_terminal_size
from pandas.core.config import get_option, set_option
from pandas.io.common import _get_handle, UnicodeWriter, _expand_user
from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user,
_stringify_path)
from pandas.io.formats.printing import adjoin, justify, pprint_thing
from pandas.io.formats.common import get_level_lengths
import pandas.core.common as com
Expand Down Expand Up @@ -1475,7 +1476,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
if path_or_buf is None:
path_or_buf = StringIO()

self.path_or_buf = _expand_user(path_or_buf)
self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
self.sep = sep
self.na_rep = na_rep
self.float_format = float_format
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/io/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,19 @@ def test_file(self):

tm.assert_frame_equal(url_table, local_table)

def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_csv,
lambda p: self.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)

def test_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(
df.to_csv,
lambda p: self.read_csv(p, index_col=0))
tm.assert_frame_equal(df, result)

def test_nonexistent_path(self):
# gh-2428: pls no segfault
# gh-14086: raise more helpful FileNotFoundError
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas.util.testing as tm
import os
import io
import pytest
import numpy as np


Expand Down Expand Up @@ -65,6 +66,29 @@ def test_from_iterator(self):
tm.assert_frame_equal(df, df0.iloc[2:5, :])
rdr.close()

@pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib")
def test_path_pathlib(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like needs xfailing here

tm._skip_if_no_pathlib()
from pathlib import Path
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
df = pd.read_sas(fname, encoding='utf-8')
tm.assert_frame_equal(df, df0)

@pytest.mark.xfail(reason="read_sas currently doesn't work with localpath")
def test_path_localpath(self):
tm._skip_if_no_localpath()
from py.path import local as LocalPath
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = LocalPath(os.path.join(self.dirpath,
"test%d.sas7bdat" % k))
df = pd.read_sas(fname, encoding='utf-8')
tm.assert_frame_equal(df, df0)

def test_iterator_loop(self):
# github #13654
for j in 0, 1:
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1858,6 +1858,16 @@ def test_freeze_panes(self):
result = read_excel(path)
tm.assert_frame_equal(expected, result)

def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_excel, pd.read_excel)
tm.assert_frame_equal(df, result)

def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_excel, pd.read_excel)
tm.assert_frame_equal(df, result)


def raise_wrapper(major_ver):
def versioned_raise_wrapper(orig_method):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,18 @@ def test_string_io(self):
result = read_msgpack(p)
tm.assert_frame_equal(result, df)

@pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib")
def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack)
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason="msgpack currently doesn't work with localpath")
def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_msgpack, read_msgpack)
tm.assert_frame_equal(df, result)

def test_iterator_with_string_io(self):

dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)]
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,17 @@ def test_pickle_v0_15_2():
tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))


def test_pickle_path_pathlib():
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
tm.assert_frame_equal(df, result)


def test_pickle_path_localpath():
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
tm.assert_frame_equal(df, result)

# ---------------------
# test pickle compression
# ---------------------
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4249,6 +4249,49 @@ def test_select_filter_corner(self):
result = store.select('frame', [crit])
tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])

def test_path_pathlib(self):
df = tm.makeDataFrame()

result = tm.round_trip_pathlib(
lambda p: df.to_hdf(p, 'df'),
lambda p: pd.read_hdf(p, 'df'))
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore')
def test_path_pathlib_hdfstore(self):
df = tm.makeDataFrame()

def writer(path):
with pd.HDFStore(path) as store:
df.to_hdf(store, 'df')

def reader(path):
with pd.HDFStore(path) as store:
pd.read_hdf(store, 'df')
result = tm.round_trip_pathlib(writer, reader)
tm.assert_frame_equal(df, result)

def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(
lambda p: df.to_hdf(p, 'df'),
lambda p: pd.read_hdf(p, 'df'))
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore')
def test_path_localpath_hdfstore(self):
df = tm.makeDataFrame()

def writer(path):
with pd.HDFStore(path) as store:
df.to_hdf(store, 'df')

def reader(path):
with pd.HDFStore(path) as store:
pd.read_hdf(store, 'df')
result = tm.round_trip_localpath(writer, reader)
tm.assert_frame_equal(df, result)

def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):

options = {}
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,3 +1283,15 @@ def test_invalid_encoding(self):
with pytest.raises(ValueError):
with tm.ensure_clean() as path:
original.to_stata(path, encoding='utf-8')

@pytest.mark.xfail(reason="stata currently doesn't work with pathlib")
def test_path_pathlib(self):
df = tm.makeDataFrame()
result = tm.round_trip_pathlib(df.to_stata, read_stata)
tm.assert_frame_equal(df, result)

@pytest.mark.xfail(reason="stata currently doesn't work with localpath")
def test_pickle_path_localpath(self):
df = tm.makeDataFrame()
result = tm.round_trip_localpath(df.to_stata, read_stata)
tm.assert_frame_equal(df, result)
57 changes: 57 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,63 @@ def round_trip_pickle(obj, path=None):
return pd.read_pickle(path)


def round_trip_pathlib(writer, reader, path=None):
"""
Write an object to file specifed by a pathlib.Path and read it back

Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.

Returns
-------
round_trip_object : pandas object
The original object that was serialized and then re-read.
"""

import pytest
Path = pytest.importorskip('pathlib').Path
if path is None:
path = '___pathlib___'
with ensure_clean(path) as path:
writer(Path(path))
obj = reader(Path(path))
return obj


def round_trip_localpath(writer, reader, path=None):
"""
Write an object to file specifed by a py.path LocalPath and read it back

Parameters
----------
writer : callable bound to pandas object
IO writing function (e.g. DataFrame.to_csv )
reader : callable
IO reading function (e.g. pd.read_csv )
path : str, default None
The path where the object is written and then read.

Returns
-------
round_trip_object : pandas object
The original object that was serialized and then re-read.
"""
import pytest
LocalPath = pytest.importorskip('py.path').local
if path is None:
path = '___localpath___'
with ensure_clean(path) as path:
writer(LocalPath(path))
obj = reader(LocalPath(path))
return obj


def assert_almost_equal(left, right, check_exact=False,
check_dtype='equiv', check_less_precise=False,
**kwargs):
Expand Down