From 1ab776ee3cdefcac86be945b18e9f9750abfa8af Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Mon, 6 Sep 2021 15:28:51 +1200 Subject: [PATCH 1/4] MAINT: rename IOError -> OSError --- pandas/_testing/_io.py | 15 ++++++++------- pandas/io/sql.py | 2 +- pandas/tests/io/formats/test_console.py | 6 +++--- pandas/tests/io/parser/test_network.py | 4 ++-- pandas/tests/io/pytables/test_errors.py | 4 ++-- pandas/tests/io/pytables/test_file_handling.py | 6 +++--- pandas/tests/io/test_common.py | 4 ++-- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index a0b6963cfac97..32224cdd4d04e 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -70,7 +70,7 @@ def _get_default_network_errors(): # Lazy import for http.client because it imports many things from the stdlib import http.client - return (IOError, http.client.HTTPException, TimeoutError) + return (OSError, http.client.HTTPException, TimeoutError) def optional_args(decorator): @@ -135,7 +135,7 @@ def network( If True, checks connectivity before running the test case. error_classes : tuple or Exception error classes to ignore. If not in ``error_classes``, raises the error. - defaults to IOError. Be careful about changing the error classes here. + defaults to OSError. Be careful about changing the error classes here. skip_errnos : iterable of int Any exception that has .errno or .reason.erno set to one of these values will be skipped with an appropriate @@ -165,19 +165,20 @@ def network( ... def test_network(): ... with pd.io.common.urlopen("rabbit://bonanza.com"): ... pass + >>> test_network() Traceback ... - URLError: + URLError: You can specify alternative URLs:: >>> @ts.network("https://www.yahoo.com") ... def test_something_with_yahoo(): - ... raise IOError("Failure Message") + ... raise OSError("Failure Message") >>> test_something_with_yahoo() Traceback (most recent call last): ... - IOError: Failure Message + OSError: Failure Message If you set check_before_test, it will check the url first and not run the test on failure:: @@ -241,7 +242,7 @@ def wrapper(*args, **kwargs): def can_connect(url, error_classes=None): """ - Try to connect to the given url. True if succeeds, False if IOError + Try to connect to the given url. True if succeeds, False if OSError raised Parameters @@ -252,7 +253,7 @@ def can_connect(url, error_classes=None): Returns ------- connectable : bool - Return True if no IOError (unable to connect) or URLError (bad url) was + Return True if no OSError (unable to connect) or URLError (bad url) was raised """ if error_classes is None: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 45444852c99a6..ec5262ee3a04c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -47,7 +47,7 @@ from pandas.util.version import Version -class DatabaseError(IOError): +class DatabaseError(OSError): pass diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index b57a2393461a2..39674db6916c1 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -39,7 +39,7 @@ def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled): assert detect_console_encoding() == filled -@pytest.mark.parametrize("encoding", [AttributeError, IOError, "ascii"]) +@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"]) def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding): # GH 21552 with monkeypatch.context() as context: @@ -55,8 +55,8 @@ def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding): ["ascii", locale.Error], [AttributeError, "ascii"], [AttributeError, locale.Error], - [IOError, "ascii"], - [IOError, locale.Error], + [OSError, "ascii"], + [OSError, locale.Error], ], ) def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 497dd74d2a9a4..36f53bb1bb155 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -204,12 +204,12 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so): def test_read_s3_fails(self, s3so): msg = "The specified bucket does not exist" - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): read_csv("s3://nyqpug/asdf.csv", storage_options=s3so) # Receive a permission error when trying to read a private bucket. # It's irrelevant here that this isn't actually a table. - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): read_csv("s3://cant_get_it/file.csv") @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False) diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 2ae330e5139be..dbcd112a28aca 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -214,7 +214,7 @@ def test_read_hdf_errors(setup_path): with ensure_clean_path(setup_path) as path: msg = r"File [\S]* does not exist" - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): read_hdf(path, "key") df.to_hdf(path, "df") @@ -222,7 +222,7 @@ def test_read_hdf_errors(setup_path): store.close() msg = "The HDFStore must be open for reading." - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): read_hdf(store, "df") diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index 88e2b5f080282..df0f7b0951b7d 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -40,7 +40,7 @@ def check(mode): # constructor if mode in ["r", "r+"]: - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): HDFStore(path, mode=mode) else: @@ -52,7 +52,7 @@ def check(mode): # context if mode in ["r", "r+"]: - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): with HDFStore(path, mode=mode) as store: pass else: @@ -63,7 +63,7 @@ def check(mode): # conv write if mode in ["r", "r+"]: - with pytest.raises(IOError, match=msg): + with pytest.raises(OSError, match=msg): df.to_hdf(path, "df", mode=mode) df.to_hdf(path, "df", mode="w") else: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b48d676cd0f8a..fc834c7acf39f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -195,7 +195,7 @@ def test_iterator(self): (pd.read_csv, "os", FileNotFoundError, "csv"), (pd.read_fwf, "os", FileNotFoundError, "txt"), (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), - (pd.read_feather, "pyarrow", IOError, "feather"), + (pd.read_feather, "pyarrow", OSError, "feather"), (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), @@ -234,7 +234,7 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext): (pd.read_table, "os", FileNotFoundError, "csv"), (pd.read_fwf, "os", FileNotFoundError, "txt"), (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), - (pd.read_feather, "pyarrow", IOError, "feather"), + (pd.read_feather, "pyarrow", OSError, "feather"), (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), From 4f019ad6278844e560477a32695213548eeee545 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Mon, 6 Sep 2021 15:28:51 +1200 Subject: [PATCH 2/4] BUG: use TypeError (not OSError) when read_csv expects file path name or file-like object --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/_libs/parsers.pyx | 4 ---- pandas/io/common.py | 4 ++++ pandas/tests/io/parser/common/test_common_basic.py | 8 ++++++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3b9ddf8138689..2a590af95d674 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -385,6 +385,7 @@ I/O - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`) - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`) +- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`) Period ^^^^^^ diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 0c3918044a374..5fe6818ff4b0e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -606,10 +606,6 @@ cdef class TextReader: cdef: void *ptr - if not hasattr(source, "read"): - raise IOError(f'Expected file path name or file-like object, ' - f'got {type(source)} type') - ptr = new_rd_source(source) self.parser.source = ptr self.parser.cb_io = &buffer_rd_bytes diff --git a/pandas/io/common.py b/pandas/io/common.py index 4e97eaf8b953c..d517d386d558a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -738,6 +738,10 @@ def get_handle( isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close ) + if "r" in ioargs.mode and not hasattr(handle, "read"): + raise TypeError('Expected file path name or file-like object, ' + f'got {type(ioargs.filepath_or_buffer)} type') + handles.reverse() # close the most recently added buffer first if ioargs.should_close: assert not isinstance(ioargs.filepath_or_buffer, str) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 841df0ea7e470..243a293603af5 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -493,6 +493,14 @@ def test_raise_on_sep_with_delim_whitespace(all_parsers): parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True) +def test_read_filepath_or_buffer(all_parsers): + # see gh-43366 + parser = all_parsers + + with pytest.raises(TypeError, match="Expected file path name or file-like"): + parser.read_csv(filepath_or_buffer=b'input') + + @xfail_pyarrow @pytest.mark.parametrize("delim_whitespace", [True, False]) def test_single_char_leading_whitespace(all_parsers, delim_whitespace): From 5f99a9b3ee49871efca86c4f5fa2cb4b53d64a65 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Thu, 9 Sep 2021 16:11:42 +1200 Subject: [PATCH 3/4] FIX: bytes -> BytesIO buffer in __init__ for BaseExcelReader & ExcelFile --- pandas/io/excel/_base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 52d1e1c83d3e6..def02a6c9242c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -404,6 +404,10 @@ def read_excel( class BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): + # First argument can also be bytes, so create a buffer + if isinstance(filepath_or_buffer, bytes): + filepath_or_buffer = BytesIO(filepath_or_buffer) + self.handles = IOHandles( handle=filepath_or_buffer, compression={"method": None} ) @@ -422,8 +426,6 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): except Exception: self.close() raise - elif isinstance(self.handles.handle, bytes): - self.book = self.load_workbook(BytesIO(self.handles.handle)) else: raise ValueError( "Must explicitly set engine if not passing in buffer or path for io." @@ -1111,7 +1113,7 @@ class ExcelFile: Parameters ---------- - path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath), + path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath), a file-like object, xlrd workbook or openpyxl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. @@ -1170,6 +1172,10 @@ def __init__( if engine is not None and engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") + # First argument can also be bytes, so create a buffer + if isinstance(path_or_buffer, bytes): + path_or_buffer = BytesIO(path_or_buffer) + # Could be a str, ExcelFile, Book, etc. self.io = path_or_buffer # Always a string From fed6c6996cb21c620db08ca7de13e0c0b640ffc1 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Sat, 11 Sep 2021 01:27:54 +0000 Subject: [PATCH 4/4] Fixes from pre-commit [automated commit] --- pandas/io/common.py | 6 ++++-- pandas/tests/io/parser/common/test_common_basic.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index d517d386d558a..a3aec4debdf44 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -739,8 +739,10 @@ def get_handle( ) if "r" in ioargs.mode and not hasattr(handle, "read"): - raise TypeError('Expected file path name or file-like object, ' - f'got {type(ioargs.filepath_or_buffer)} type') + raise TypeError( + "Expected file path name or file-like object, " + f"got {type(ioargs.filepath_or_buffer)} type" + ) handles.reverse() # close the most recently added buffer first if ioargs.should_close: diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 243a293603af5..635d848906fdb 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -498,7 +498,7 @@ def test_read_filepath_or_buffer(all_parsers): parser = all_parsers with pytest.raises(TypeError, match="Expected file path name or file-like"): - parser.read_csv(filepath_or_buffer=b'input') + parser.read_csv(filepath_or_buffer=b"input") @xfail_pyarrow