diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index e85ba505887b4..4b2999416ffbe 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -86,6 +86,8 @@ Documentation Changes Bug Fixes ~~~~~~~~~ +- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`) + Conversion ^^^^^^^^^^ diff --git a/pandas/io/packers.py b/pandas/io/packers.py index abd258034af99..ef65a3275060b 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -192,7 +192,6 @@ def read(fh): # see if we have an actual file if isinstance(path_or_buf, compat.string_types): - try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): @@ -202,18 +201,21 @@ def read(fh): with open(path_or_buf, 'rb') as fh: return read(fh) - # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): + # treat as a binary-like fh = None try: - fh = compat.BytesIO(path_or_buf) - return read(fh) + # We can't distinguish between a path and a buffer of bytes in + # Python 2 so instead assume the first byte of a valid path is + # less than 0x80. + if compat.PY3 or ord(path_or_buf[0]) >= 0x80: + fh = compat.BytesIO(path_or_buf) + return read(fh) finally: if fh is not None: fh.close() - - # a buffer like - if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + # treat as a buffer like return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like') diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 30904593fedc4..707580bfe9601 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -10,7 +10,7 @@ import pandas.util.testing as tm from pandas.io import common -from pandas.compat import is_platform_windows, StringIO +from pandas.compat import is_platform_windows, StringIO, FileNotFoundError from pandas import read_csv, concat @@ -125,6 +125,26 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) + @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ + (pd.read_csv, 'os', FileNotFoundError, 'csv'), + (pd.read_table, 'os', FileNotFoundError, 'csv'), + (pd.read_fwf, 'os', FileNotFoundError, 'txt'), + (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'), + (pd.read_feather, 'feather', Exception, 'feather'), + (pd.read_hdf, 'tables', FileNotFoundError, 'h5'), + (pd.read_stata, 'os', FileNotFoundError, 'dta'), + (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'), + (pd.read_json, 'os', ValueError, 'json'), + (pd.read_msgpack, 'os', ValueError, 'mp'), + (pd.read_pickle, 'os', FileNotFoundError, 'pickle'), + ]) + def test_read_non_existant(self, reader, module, error_class, fn_ext): + pytest.importorskip(module) + + path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext) + with pytest.raises(error_class): + reader(path) + @pytest.mark.parametrize('reader, module, path', [ (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')), (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),