diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 351aa9ebbdc32..0a3a440ced54f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -20,6 +20,10 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ +- Support for `PEP 519 -- Adding a file system path protocol + `_ on most readers and writers (:issue:`13823`) +- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`, + and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) .. _whatsnew_0210.enhancements.other: @@ -40,6 +44,8 @@ Backwards incompatible API changes - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) +- Accessing a non-existent attribute on a closed :class:`HDFStore` will now + raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) .. _whatsnew_0210.api: diff --git a/pandas/io/common.py b/pandas/io/common.py index 14ac4d366fcef..f4e12ea3fb173 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -146,8 +146,7 @@ def _validate_header_arg(header): def _stringify_path(filepath_or_buffer): - """Return the argument coerced to a string if it was a pathlib.Path - or a py.path.local + """Attempt to convert a path-like object to a string. Parameters ---------- @@ -155,8 +154,21 @@ def _stringify_path(filepath_or_buffer): Returns ------- - str_filepath_or_buffer : a the string version of the input path + str_filepath_or_buffer : maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol (python 3.6+) are coerced + according to its __fspath__ method. + + For backwards compatibility with older pythons, pathlib.Path and + py.path objects are specially coerced. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. """ + if hasattr(filepath_or_buffer, '__fspath__'): + return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): return text_type(filepath_or_buffer) if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): @@ -180,10 +192,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, ------- a filepath_or_buffer, the encoding, the compression """ + filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): - url = str(filepath_or_buffer) - req = _urlopen(url) + req = _urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header @@ -197,9 +209,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, encoding=encoding, compression=compression) - # Convert pathlib.Path/py.path.local or string - filepath_or_buffer = _stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 9b0f49ccc45b1..fba3d7559aeaf 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -18,7 +18,8 @@ from pandas.io.parsers import TextParser from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, - get_filepath_or_buffer, _NA_VALUES) + get_filepath_or_buffer, _NA_VALUES, + _stringify_path) from pandas.core.indexes.period import Period import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, @@ -233,7 +234,10 @@ def __init__(self, io, **kwds): raise ImportError("pandas requires xlrd >= 0.9.0 for excel " "support, current version " + xlrd.__VERSION__) + # could be a str, ExcelFile, Book, etc. self.io = io + # Always a string + self._io = _stringify_path(io) engine = kwds.pop('engine', None) @@ -242,10 +246,10 @@ def __init__(self, io, **kwds): # If io is a url, want to keep the data as bytes so can't pass # to get_filepath_or_buffer() - if _is_url(io): - io = _urlopen(io) - elif not isinstance(io, (ExcelFile, xlrd.Book)): - io, _, _ = get_filepath_or_buffer(io) + if _is_url(self._io): + io = _urlopen(self._io) + elif not isinstance(self.io, (ExcelFile, xlrd.Book)): + io, _, _ = get_filepath_or_buffer(self._io) if engine == 'xlrd' and isinstance(io, xlrd.Book): self.book = io @@ -253,12 +257,15 @@ def __init__(self, io, **kwds): # N.B. xlrd.Book has a read attribute too data = io.read() self.book = xlrd.open_workbook(file_contents=data) - elif isinstance(io, compat.string_types): - self.book = xlrd.open_workbook(io) + elif isinstance(self._io, compat.string_types): + self.book = xlrd.open_workbook(self._io) else: raise ValueError('Must explicitly set engine if not passing in' ' buffer or path for io.') + def __fspath__(self): + return self._io + def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, @@ -754,6 +761,9 @@ def __init__(self, path, engine=None, else: self.datetime_format = datetime_format + def __fspath__(self): + return _stringify_path(self.path) + def _get_sheet_name(self, sheet_name): if sheet_name is None: sheet_name = self.cur_sheet diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index de6d04c105376..8bdb23fc1ae6a 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -3,6 +3,7 @@ from distutils.version import LooseVersion from pandas import DataFrame, RangeIndex, Int64Index from pandas.compat import range +from pandas.io.common import _stringify_path def _try_import(): @@ -43,6 +44,7 @@ def to_feather(df, path): path : string File path """ + path = _stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") @@ -99,4 +101,5 @@ def read_feather(path): """ feather = _try_import() + path = _stringify_path(path) return feather.read_dataframe(path) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 80587f9a752c7..80c3880d39dfd 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -7,7 +7,7 @@ import numpy as np -from pandas.compat import reduce, string_types +from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_float @@ -617,9 +617,12 @@ def write(self, writer, sheet_name='Sheet1', startrow=0, and ``io.excel.xlsm.writer``. """ from pandas.io.excel import ExcelWriter - need_save = False - if isinstance(writer, string_types): - writer = ExcelWriter(writer, engine=engine) + from pandas.io.common import _stringify_path + + if isinstance(writer, ExcelWriter): + need_save = False + else: + writer = ExcelWriter(_stringify_path(writer), engine=engine) need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 183d8d9d87d0b..054db769c56dd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -369,7 +369,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, index_names=True, line_width=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', **kwds): self.frame = frame - self.buf = _expand_user(buf) if buf is not None else StringIO() + if buf is not None: + self.buf = _expand_user(_stringify_path(buf)) + else: + self.buf = StringIO() self.show_index_names = index_names if sparsify is None: diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index b2fe074732cbb..31907ad586817 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -7,7 +7,8 @@ from pandas.compat import StringIO, long, u from pandas import compat, isnull from pandas import Series, DataFrame, to_datetime, MultiIndex -from pandas.io.common import get_filepath_or_buffer, _get_handle +from pandas.io.common import (get_filepath_or_buffer, _get_handle, + _stringify_path) from pandas.core.common import AbstractMethodError from pandas.io.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits @@ -25,6 +26,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False): + path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': raise ValueError( "'lines' keyword only valid when 'orient' is records") diff --git a/pandas/io/packers.py b/pandas/io/packers.py index a4b454eda7472..a2fc4db23700c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -61,7 +61,7 @@ from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame from pandas.errors import PerformanceWarning -from pandas.io.common import get_filepath_or_buffer +from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals @@ -149,6 +149,7 @@ def writer(fh): for a in args: fh.write(pack(a, **kwargs)) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6f4c714931fc8..6f345092c514d 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,7 +4,7 @@ from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE -from pandas.io.common import _get_handle, _infer_compression +from pandas.io.common import _get_handle, _infer_compression, _stringify_path def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): @@ -34,6 +34,7 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ + path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, @@ -71,7 +72,7 @@ def read_pickle(path, compression='infer'): ------- unpickled : type of object stored in file """ - + path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) def read_wrapper(func): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f017421c1f83a..6665ccf8ce4c5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -309,9 +309,17 @@ def read_hdf(path_or_buf, key=None, **kwargs): if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) - path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, string_types): + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise IOError('The HDFStore must be open for reading.') + store = path_or_buf + auto_close = False + else: + path_or_buf = _stringify_path(path_or_buf) + if not isinstance(path_or_buf, string_types): + raise NotImplementedError('Support for generic buffers has not ' + 'been implemented.') try: exists = os.path.exists(path_or_buf) @@ -323,22 +331,11 @@ def read_hdf(path_or_buf, key=None, **kwargs): raise compat.FileNotFoundError( 'File %s does not exist' % path_or_buf) + store = HDFStore(path_or_buf, **kwargs) # can't auto open/close if we are using an iterator # so delegate to the iterator - store = HDFStore(path_or_buf, **kwargs) auto_close = True - elif isinstance(path_or_buf, HDFStore): - if not path_or_buf.is_open: - raise IOError('The HDFStore must be open for reading.') - - store = path_or_buf - auto_close = False - - else: - raise NotImplementedError('Support for generic buffers has not been ' - 'implemented.') - try: if key is None: groups = store.groups() @@ -440,7 +437,7 @@ def __init__(self, path, mode=None, complevel=None, complib=None, "complib only supports {libs} compression.".format( libs=tables.filters.all_complibs)) - self._path = path + self._path = _stringify_path(path) if mode is None: mode = 'a' self._mode = mode @@ -451,6 +448,9 @@ def __init__(self, path, mode=None, complevel=None, complib=None, self._filters = None self.open(mode=mode, **kwargs) + def __fspath__(self): + return self._path + @property def root(self): """ return the root node """ @@ -472,7 +472,6 @@ def __delitem__(self, key): def __getattr__(self, name): """ allow attribute access to get stores """ - self._check_if_open() try: return self.get(name) except: diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 3e4d9c9024dbd..b8a0bf5733158 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -2,6 +2,7 @@ Read SAS sas7bdat or xport files. """ from pandas import compat +from pandas.io.common import _stringify_path def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, @@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, buffer_error_msg = ("If this is a buffer object rather " "than a string name, you must specify " "a format string") + filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, compat.string_types): raise ValueError(buffer_error_msg) try: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 55cac83804cd9..e03e87f09173e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -30,7 +30,8 @@ from pandas.util._decorators import Appender import pandas as pd -from pandas.io.common import get_filepath_or_buffer, BaseIterator +from pandas.io.common import (get_filepath_or_buffer, BaseIterator, + _stringify_path) from pandas._libs.lib import max_len_string_array, infer_dtype from pandas._libs.tslib import NaT, Timestamp @@ -976,6 +977,7 @@ def __init__(self, path_or_buf, convert_dates=True, self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): path_or_buf, encoding, _ = get_filepath_or_buffer( path_or_buf, encoding=self._default_encoding @@ -1930,7 +1932,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True, if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) - self._fname = fname + self._fname = _stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} def _write(self, to_write): diff --git a/pandas/tests/io/data/feather-0_3_1.feather b/pandas/tests/io/data/feather-0_3_1.feather new file mode 100644 index 0000000000000..5a2c7b3dcc684 Binary files /dev/null and b/pandas/tests/io/data/feather-0_3_1.feather differ diff --git a/pandas/tests/io/data/fixed_width_format.txt b/pandas/tests/io/data/fixed_width_format.txt new file mode 100644 index 0000000000000..bb487d8de7ef9 --- /dev/null +++ b/pandas/tests/io/data/fixed_width_format.txt @@ -0,0 +1,3 @@ +A B C +1 2 3 +4 5 6 diff --git a/pandas/tests/io/msgpack/data/frame.mp b/pandas/tests/io/msgpack/data/frame.mp new file mode 100644 index 0000000000000..21e20d262b26c Binary files /dev/null and b/pandas/tests/io/msgpack/data/frame.mp differ diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 7070c3c7c9382..730bf94cb2987 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -3,7 +3,6 @@ import pandas.util.testing as tm import os import io -import pytest import numpy as np @@ -66,7 +65,6 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() - @pytest.mark.xfail(reason="read_sas currently doesn't work with pathlib") def test_path_pathlib(self): tm._skip_if_no_pathlib() from pathlib import Path @@ -77,7 +75,6 @@ def test_path_pathlib(self): df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0) - @pytest.mark.xfail(reason="read_sas currently doesn't work with localpath") def test_path_localpath(self): tm._skip_if_no_localpath() from py.path import local as LocalPath diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a1a95e09915f1..b7d158dd75960 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -6,6 +6,7 @@ import os from os.path import isabs +import pandas as pd import pandas.util.testing as tm from pandas.io import common @@ -24,6 +25,18 @@ pass +class CustomFSPath(object): + """For testing fspath on unknown objects""" + def __init__(self, path): + self.path = path + + def __fspath__(self): + return self.path + + +HERE = os.path.dirname(__file__) + + class TestCommonIOCapabilities(object): data1 = """index,A,B,C,D foo,2,3,4,5 @@ -65,6 +78,11 @@ def test_stringify_path_localpath(self): lpath = LocalPath(path) assert common._stringify_path(lpath) == abs_path + def test_stringify_path_fspath(self): + p = CustomFSPath('foo/bar.csv') + result = common._stringify_path(p) + assert result == 'foo/bar.csv' + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) @@ -89,6 +107,70 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) + @pytest.mark.parametrize('reader, module, path', [ + (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')), + (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')), + (pd.read_fwf, 'os', os.path.join(HERE, 'data', + 'fixed_width_format.txt')), + (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')), + (pd.read_feather, 'feather', os.path.join(HERE, 'data', + 'feather-0_3_1.feather')), + (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf', + 'datetimetz_object.h5')), + (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')), + (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data', + 'test1.sas7bdat')), + (pd.read_json, 'os', os.path.join(HERE, 'json', 'data', + 'tsframe_v012.json')), + (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data', + 'frame.mp')), + (pd.read_pickle, 'os', os.path.join(HERE, 'data', + 'categorical_0_14_1.pickle')), + ]) + def test_read_fspath_all(self, reader, module, path): + pytest.importorskip(module) + + mypath = CustomFSPath(path) + result = reader(mypath) + expected = reader(path) + if path.endswith('.pickle'): + # categorical + tm.assert_categorical_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('writer_name, writer_kwargs, module', [ + ('to_csv', {}, 'os'), + ('to_excel', {'engine': 'xlwt'}, 'xlwt'), + ('to_feather', {}, 'feather'), + ('to_hdf', {'key': 'bar', 'mode': 'w'}, 'tables'), + ('to_html', {}, 'os'), + ('to_json', {}, 'os'), + ('to_latex', {}, 'os'), + ('to_msgpack', {}, 'os'), + ('to_pickle', {}, 'os'), + ('to_stata', {}, 'os'), + ]) + def test_write_fspath_all(self, writer_name, writer_kwargs, module): + p1 = tm.ensure_clean('string') + p2 = tm.ensure_clean('fspath') + df = pd.DataFrame({"A": [1, 2]}) + + with p1 as string, p2 as fspath: + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + with open(string, 'rb') as f: + expected = f.read() + + writer(mypath, **writer_kwargs) + with open(fspath, 'rb') as f: + result = f.read() + + assert result == expected + class TestMMapWrapper(object): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index b4a5b24616728..bbf4f1107ac9e 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2499,3 +2499,24 @@ def custom_converter(css): n_cells += 1 assert n_cells == (10 + 1) * (3 + 1) + + +class TestFSPath(object): + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + def test_excelfile_fspath(self): + _skip_if_no_openpyxl() + with tm.ensure_clean('foo.xlsx') as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + xl = ExcelFile(path) + result = os.fspath(xl) + assert result == path + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + # @pytest.mark.xfail + def test_excelwriter_fspath(self): + _skip_if_no_openpyxl() + with tm.ensure_clean('foo.xlsx') as path: + writer = ExcelWriter(path) + assert os.fspath(writer) == str(path) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index e3190efecba30..948ab736af6c6 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -116,13 +116,11 @@ def test_write_with_index(self): df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) - @pytest.mark.xfail(reason="feather currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="feather currently doesn't work with localpath") def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index fd42becca3ac3..a28adcf1ee771 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -134,13 +134,11 @@ def test_string_io(self): result = read_msgpack(p) tm.assert_frame_equal(result, df) - @pytest.mark.xfail(reason="msgpack currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame() result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="msgpack currently doesn't work with localpath") def test_path_localpath(self): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c9d2da67b8ee3..17f524cc279c0 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -16,7 +16,7 @@ date_range, timedelta_range, Index, DatetimeIndex, isnull) -from pandas.compat import is_platform_windows, PY3, PY35 +from pandas.compat import is_platform_windows, PY3, PY35, BytesIO from pandas.io.formats.printing import pprint_thing tables = pytest.importorskip('tables') @@ -4290,7 +4290,6 @@ def test_path_pathlib(self): lambda p: pd.read_hdf(p, 'df')) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason='pathlib currently doesnt work with HDFStore') def test_path_pathlib_hdfstore(self): df = tm.makeDataFrame() @@ -4300,7 +4299,8 @@ def writer(path): def reader(path): with pd.HDFStore(path) as store: - pd.read_hdf(store, 'df') + return pd.read_hdf(store, 'df') + result = tm.round_trip_pathlib(writer, reader) tm.assert_frame_equal(df, result) @@ -4311,7 +4311,6 @@ def test_pickle_path_localpath(self): lambda p: pd.read_hdf(p, 'df')) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason='localpath currently doesnt work with HDFStore') def test_path_localpath_hdfstore(self): df = tm.makeDataFrame() @@ -4321,7 +4320,8 @@ def writer(path): def reader(path): with pd.HDFStore(path) as store: - pd.read_hdf(store, 'df') + return pd.read_hdf(store, 'df') + result = tm.round_trip_localpath(writer, reader) tm.assert_frame_equal(df, result) @@ -4453,7 +4453,7 @@ def f(): pytest.raises(ClosedFileError, lambda: 'df' in store) pytest.raises(ClosedFileError, lambda: len(store)) pytest.raises(ClosedFileError, lambda: store['df']) - pytest.raises(ClosedFileError, lambda: store.df) + pytest.raises(AttributeError, lambda: store.df) pytest.raises(ClosedFileError, store.select, 'df') pytest.raises(ClosedFileError, store.get, 'df') pytest.raises(ClosedFileError, store.append, 'df2', df) @@ -5007,8 +5007,9 @@ def test_read_hdf_errors(self): store = HDFStore(path, mode='r') store.close() pytest.raises(IOError, read_hdf, store, 'df') - with open(path, mode='r') as store: - pytest.raises(NotImplementedError, read_hdf, store, 'df') + + def test_read_hdf_generic_buffer_errors(self): + pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df') def test_invalid_complib(self): df = DataFrame(np.random.rand(4, 5), @@ -5154,6 +5155,12 @@ def test_query_compare_column_type(self): expected = df.loc[[], :] tm.assert_frame_equal(expected, result) + @pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6") + def test_fspath(self): + with tm.ensure_clean('foo.h5') as path: + with pd.HDFStore(path) as store: + assert os.fspath(store) == str(path) + class TestHDFComplexValues(Base): # GH10447 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4ec990116bb62..b9c6736563160 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1284,14 +1284,16 @@ def test_invalid_encoding(self): with tm.ensure_clean() as path: original.to_stata(path, encoding='utf-8') - @pytest.mark.xfail(reason="stata currently doesn't work with pathlib") def test_path_pathlib(self): df = tm.makeDataFrame() - result = tm.round_trip_pathlib(df.to_stata, read_stata) + df.index.name = 'index' + reader = lambda x: read_stata(x).set_index('index') + result = tm.round_trip_pathlib(df.to_stata, reader) tm.assert_frame_equal(df, result) - @pytest.mark.xfail(reason="stata currently doesn't work with localpath") def test_pickle_path_localpath(self): df = tm.makeDataFrame() - result = tm.round_trip_localpath(df.to_stata, read_stata) + df.index.name = 'index' + reader = lambda x: read_stata(x).set_index('index') + result = tm.round_trip_localpath(df.to_stata, reader) tm.assert_frame_equal(df, result) diff --git a/setup.py b/setup.py index ff537d5868db6..82d5f407228a9 100755 --- a/setup.py +++ b/setup.py @@ -709,6 +709,7 @@ def pxd(name): 'data/html_encoding/*.html', 'json/data/*.json'], 'pandas.tests.io.formats': ['data/*.csv'], + 'pandas.tests.io.msgpack': ['data/*.mp'], 'pandas.tests.reshape': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'], 'pandas.io.formats': ['templates/*.tpl']