diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index e0a8065d9a507..6bc63f702952a 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -33,6 +33,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) + Conversion ^^^^^^^^^^ diff --git a/pandas/io/common.py b/pandas/io/common.py index 28f90972f95de..14ac4d366fcef 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -314,6 +314,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, handles = list() f = path_or_buf + + # Convert pathlib.Path/py.path.local or string + path_or_buf = _stringify_path(path_or_buf) is_path = isinstance(path_or_buf, compat.string_types) if compression: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 65098bb2aa404..183d8d9d87d0b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -32,7 +32,8 @@ OrderedDict, unichr) from pandas.io.formats.terminal import get_terminal_size from pandas.core.config import get_option, set_option -from pandas.io.common import _get_handle, UnicodeWriter, _expand_user +from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user, + _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths import pandas.core.common as com @@ -1475,7 +1476,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if path_or_buf is None: path_or_buf = StringIO() - self.path_or_buf = _expand_user(path_or_buf) + self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) self.sep = sep self.na_rep = na_rep self.float_format = float_format diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index bcce0c6d020ae..31d815a4bca97 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -679,6 +679,19 @@ def test_file(self): tm.assert_frame_equal(url_table, local_table) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath( + df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + def test_nonexistent_path(self): # gh-2428: pls no segfault # gh-14086: raise more helpful FileNotFoundError diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index a5157744038f4..7070c3c7c9382 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -3,6 +3,7 @@ import pandas.util.testing as tm import os import io +import pytest import numpy as np @@ -65,6 +66,29 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() + @pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib") + def test_path_pathlib(self): + tm._skip_if_no_pathlib() + from pathlib import Path + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k)) + df = pd.read_sas(fname, encoding='utf-8') + tm.assert_frame_equal(df, df0) + + @pytest.mark.xfail(reason="read_sas currently doesn't work with localpath") + def test_path_localpath(self): + tm._skip_if_no_localpath() + from py.path import local as LocalPath + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = LocalPath(os.path.join(self.dirpath, + "test%d.sas7bdat" % k)) + df = pd.read_sas(fname, encoding='utf-8') + tm.assert_frame_equal(df, df0) + def test_iterator_loop(self): # github #13654 for j in 0, 1: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index c70b5937fea3f..b4a5b24616728 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1858,6 +1858,16 @@ def test_freeze_panes(self): result = read_excel(path) tm.assert_frame_equal(expected, result) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_excel, pd.read_excel) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_excel, pd.read_excel) + tm.assert_frame_equal(df, result) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method): diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 232bb126d9d67..e3190efecba30 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -9,6 +9,7 @@ from feather import FeatherError from pandas.util.testing import assert_frame_equal, ensure_clean +import pandas.util.testing as tm @pytest.mark.single @@ -114,3 +115,15 @@ def test_write_with_index(self): df.index = [0, 1, 2] df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) + + @pytest.mark.xfail(reason="feather currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="feather currently doesn't work with localpath") + def test_path_localpath(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_localpath(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 4b1145129c364..fd42becca3ac3 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -134,6 +134,18 @@ def test_string_io(self): result = read_msgpack(p) tm.assert_frame_equal(result, df) + @pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="msgpack currently doesn't work with localpath") + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + def test_iterator_with_string_io(self): dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)] diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 875b5bd3055b9..429ec5ba1c474 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -299,6 +299,18 @@ def test_pickle_v0_15_2(): tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) +def test_pickle_path_pathlib(): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + # --------------------- # test pickle compression # --------------------- diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 873bb20b3bba9..e4f8f44f86c27 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4249,6 +4249,49 @@ def test_select_filter_corner(self): result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) + def test_path_pathlib(self): + df = tm.makeDataFrame() + + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore') + def test_path_pathlib_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + pd.read_hdf(store, 'df') + result = tm.round_trip_pathlib(writer, reader) + tm.assert_frame_equal(df, result) + + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore') + def test_path_localpath_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + pd.read_hdf(store, 'df') + result = tm.round_trip_localpath(writer, reader) + tm.assert_frame_equal(df, result) + def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4c92c19c51e7a..4ec990116bb62 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1283,3 +1283,15 @@ def test_invalid_encoding(self): with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path, encoding='utf-8') + + @pytest.mark.xfail(reason="stata currently doesn't work with pathlib") + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_stata, read_stata) + tm.assert_frame_equal(df, result) + + @pytest.mark.xfail(reason="stata currently doesn't work with localpath") + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_stata, read_stata) + tm.assert_frame_equal(df, result) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5f01f42eb0c69..81d452318d0b4 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -117,6 +117,63 @@ def round_trip_pickle(obj, path=None): return pd.read_pickle(path) +def round_trip_pathlib(writer, reader, path=None): + """ + Write an object to file specifed by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + + import pytest + Path = pytest.importorskip('pathlib').Path + if path is None: + path = '___pathlib___' + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path=None): + """ + Write an object to file specifed by a py.path LocalPath and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + import pytest + LocalPath = pytest.importorskip('py.path').local + if path is None: + path = '___localpath___' + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs):