From 1ab776ee3cdefcac86be945b18e9f9750abfa8af Mon Sep 17 00:00:00 2001
From: Mike Taves <mwtoews@gmail.com>
Date: Mon, 6 Sep 2021 15:28:51 +1200
Subject: [PATCH 1/4] MAINT: rename IOError -> OSError

---
 pandas/_testing/_io.py                         | 15 ++++++++-------
 pandas/io/sql.py                               |  2 +-
 pandas/tests/io/formats/test_console.py        |  6 +++---
 pandas/tests/io/parser/test_network.py         |  4 ++--
 pandas/tests/io/pytables/test_errors.py        |  4 ++--
 pandas/tests/io/pytables/test_file_handling.py |  6 +++---
 pandas/tests/io/test_common.py                 |  4 ++--
 7 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
index a0b6963cfac97..32224cdd4d04e 100644
--- a/pandas/_testing/_io.py
+++ b/pandas/_testing/_io.py
@@ -70,7 +70,7 @@ def _get_default_network_errors():
     # Lazy import for http.client because it imports many things from the stdlib
     import http.client
 
-    return (IOError, http.client.HTTPException, TimeoutError)
+    return (OSError, http.client.HTTPException, TimeoutError)
 
 
 def optional_args(decorator):
@@ -135,7 +135,7 @@ def network(
         If True, checks connectivity before running the test case.
     error_classes : tuple or Exception
         error classes to ignore. If not in ``error_classes``, raises the error.
-        defaults to IOError. Be careful about changing the error classes here.
+        defaults to OSError. Be careful about changing the error classes here.
     skip_errnos : iterable of int
         Any exception that has .errno or .reason.erno set to one
         of these values will be skipped with an appropriate
@@ -165,19 +165,20 @@ def network(
       ... def test_network():
       ...     with pd.io.common.urlopen("rabbit://bonanza.com"):
       ...         pass
+      >>> test_network()
       Traceback
          ...
-      URLError: <urlopen error unknown url type: rabit>
+      URLError: <urlopen error unknown url type: rabbit>
 
       You can specify alternative URLs::
 
         >>> @ts.network("https://www.yahoo.com")
         ... def test_something_with_yahoo():
-        ...    raise IOError("Failure Message")
+        ...    raise OSError("Failure Message")
         >>> test_something_with_yahoo()
         Traceback (most recent call last):
             ...
-        IOError: Failure Message
+        OSError: Failure Message
 
     If you set check_before_test, it will check the url first and not run the
     test on failure::
@@ -241,7 +242,7 @@ def wrapper(*args, **kwargs):
 
 def can_connect(url, error_classes=None):
     """
-    Try to connect to the given url. True if succeeds, False if IOError
+    Try to connect to the given url. True if succeeds, False if OSError
     raised
 
     Parameters
@@ -252,7 +253,7 @@ def can_connect(url, error_classes=None):
     Returns
     -------
     connectable : bool
-        Return True if no IOError (unable to connect) or URLError (bad url) was
+        Return True if no OSError (unable to connect) or URLError (bad url) was
         raised
     """
     if error_classes is None:
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 45444852c99a6..ec5262ee3a04c 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -47,7 +47,7 @@
 from pandas.util.version import Version
 
 
-class DatabaseError(IOError):
+class DatabaseError(OSError):
     pass
 
 
diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py
index b57a2393461a2..39674db6916c1 100644
--- a/pandas/tests/io/formats/test_console.py
+++ b/pandas/tests/io/formats/test_console.py
@@ -39,7 +39,7 @@ def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
         assert detect_console_encoding() == filled
 
 
-@pytest.mark.parametrize("encoding", [AttributeError, IOError, "ascii"])
+@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
 def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
     # GH 21552
     with monkeypatch.context() as context:
@@ -55,8 +55,8 @@ def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
         ["ascii", locale.Error],
         [AttributeError, "ascii"],
         [AttributeError, locale.Error],
-        [IOError, "ascii"],
-        [IOError, locale.Error],
+        [OSError, "ascii"],
+        [OSError, locale.Error],
     ],
 )
 def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index 497dd74d2a9a4..36f53bb1bb155 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -204,12 +204,12 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so):
 
     def test_read_s3_fails(self, s3so):
         msg = "The specified bucket does not exist"
-        with pytest.raises(IOError, match=msg):
+        with pytest.raises(OSError, match=msg):
             read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
 
         # Receive a permission error when trying to read a private bucket.
         # It's irrelevant here that this isn't actually a table.
-        with pytest.raises(IOError, match=msg):
+        with pytest.raises(OSError, match=msg):
             read_csv("s3://cant_get_it/file.csv")
 
     @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)
diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py
index 2ae330e5139be..dbcd112a28aca 100644
--- a/pandas/tests/io/pytables/test_errors.py
+++ b/pandas/tests/io/pytables/test_errors.py
@@ -214,7 +214,7 @@ def test_read_hdf_errors(setup_path):
 
     with ensure_clean_path(setup_path) as path:
         msg = r"File [\S]* does not exist"
-        with pytest.raises(IOError, match=msg):
+        with pytest.raises(OSError, match=msg):
             read_hdf(path, "key")
 
         df.to_hdf(path, "df")
@@ -222,7 +222,7 @@ def test_read_hdf_errors(setup_path):
         store.close()
 
         msg = "The HDFStore must be open for reading."
-        with pytest.raises(IOError, match=msg):
+        with pytest.raises(OSError, match=msg):
             read_hdf(store, "df")
 
 
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
index 88e2b5f080282..df0f7b0951b7d 100644
--- a/pandas/tests/io/pytables/test_file_handling.py
+++ b/pandas/tests/io/pytables/test_file_handling.py
@@ -40,7 +40,7 @@ def check(mode):
 
             # constructor
             if mode in ["r", "r+"]:
-                with pytest.raises(IOError, match=msg):
+                with pytest.raises(OSError, match=msg):
                     HDFStore(path, mode=mode)
 
             else:
@@ -52,7 +52,7 @@ def check(mode):
 
             # context
             if mode in ["r", "r+"]:
-                with pytest.raises(IOError, match=msg):
+                with pytest.raises(OSError, match=msg):
                     with HDFStore(path, mode=mode) as store:
                         pass
             else:
@@ -63,7 +63,7 @@ def check(mode):
 
             # conv write
             if mode in ["r", "r+"]:
-                with pytest.raises(IOError, match=msg):
+                with pytest.raises(OSError, match=msg):
                     df.to_hdf(path, "df", mode=mode)
                 df.to_hdf(path, "df", mode="w")
             else:
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index b48d676cd0f8a..fc834c7acf39f 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -195,7 +195,7 @@ def test_iterator(self):
             (pd.read_csv, "os", FileNotFoundError, "csv"),
             (pd.read_fwf, "os", FileNotFoundError, "txt"),
             (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
-            (pd.read_feather, "pyarrow", IOError, "feather"),
+            (pd.read_feather, "pyarrow", OSError, "feather"),
             (pd.read_hdf, "tables", FileNotFoundError, "h5"),
             (pd.read_stata, "os", FileNotFoundError, "dta"),
             (pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
@@ -234,7 +234,7 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext):
             (pd.read_table, "os", FileNotFoundError, "csv"),
             (pd.read_fwf, "os", FileNotFoundError, "txt"),
             (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
-            (pd.read_feather, "pyarrow", IOError, "feather"),
+            (pd.read_feather, "pyarrow", OSError, "feather"),
             (pd.read_hdf, "tables", FileNotFoundError, "h5"),
             (pd.read_stata, "os", FileNotFoundError, "dta"),
             (pd.read_sas, "os", FileNotFoundError, "sas7bdat"),

From 4f019ad6278844e560477a32695213548eeee545 Mon Sep 17 00:00:00 2001
From: Mike Taves <mwtoews@gmail.com>
Date: Mon, 6 Sep 2021 15:28:51 +1200
Subject: [PATCH 2/4] BUG: use TypeError (not OSError) when read_csv expects
 file path name or file-like object

---
 doc/source/whatsnew/v1.4.0.rst                     | 1 +
 pandas/_libs/parsers.pyx                           | 4 ----
 pandas/io/common.py                                | 4 ++++
 pandas/tests/io/parser/common/test_common_basic.py | 8 ++++++++
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 3b9ddf8138689..2a590af95d674 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -385,6 +385,7 @@ I/O
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
 
 Period
 ^^^^^^
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 0c3918044a374..5fe6818ff4b0e 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -606,10 +606,6 @@ cdef class TextReader:
         cdef:
             void *ptr
 
-        if not hasattr(source, "read"):
-            raise IOError(f'Expected file path name or file-like object, '
-                          f'got {type(source)} type')
-
         ptr = new_rd_source(source)
         self.parser.source = ptr
         self.parser.cb_io = &buffer_rd_bytes
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 4e97eaf8b953c..d517d386d558a 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -738,6 +738,10 @@ def get_handle(
             isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close
         )
 
+    if "r" in ioargs.mode and not hasattr(handle, "read"):
+        raise TypeError('Expected file path name or file-like object, '
+                        f'got {type(ioargs.filepath_or_buffer)} type')
+
     handles.reverse()  # close the most recently added buffer first
     if ioargs.should_close:
         assert not isinstance(ioargs.filepath_or_buffer, str)
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 841df0ea7e470..243a293603af5 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -493,6 +493,14 @@ def test_raise_on_sep_with_delim_whitespace(all_parsers):
         parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
 
 
+def test_read_filepath_or_buffer(all_parsers):
+    # see gh-43366
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match="Expected file path name or file-like"):
+        parser.read_csv(filepath_or_buffer=b'input')
+
+
 @xfail_pyarrow
 @pytest.mark.parametrize("delim_whitespace", [True, False])
 def test_single_char_leading_whitespace(all_parsers, delim_whitespace):

From 5f99a9b3ee49871efca86c4f5fa2cb4b53d64a65 Mon Sep 17 00:00:00 2001
From: Mike Taves <mwtoews@gmail.com>
Date: Thu, 9 Sep 2021 16:11:42 +1200
Subject: [PATCH 3/4] FIX: bytes -> BytesIO buffer in __init__ for
 BaseExcelReader & ExcelFile

---
 pandas/io/excel/_base.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 52d1e1c83d3e6..def02a6c9242c 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -404,6 +404,10 @@ def read_excel(
 
 class BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
+        # First argument can also be bytes, so create a buffer
+        if isinstance(filepath_or_buffer, bytes):
+            filepath_or_buffer = BytesIO(filepath_or_buffer)
+
         self.handles = IOHandles(
             handle=filepath_or_buffer, compression={"method": None}
         )
@@ -422,8 +426,6 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
             except Exception:
                 self.close()
                 raise
-        elif isinstance(self.handles.handle, bytes):
-            self.book = self.load_workbook(BytesIO(self.handles.handle))
         else:
             raise ValueError(
                 "Must explicitly set engine if not passing in buffer or path for io."
@@ -1111,7 +1113,7 @@ class ExcelFile:
 
     Parameters
     ----------
-    path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath),
+    path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath),
         a file-like object, xlrd workbook or openpyxl workbook.
         If a string or path object, expected to be a path to a
         .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
@@ -1170,6 +1172,10 @@ def __init__(
         if engine is not None and engine not in self._engines:
             raise ValueError(f"Unknown engine: {engine}")
 
+        # First argument can also be bytes, so create a buffer
+        if isinstance(path_or_buffer, bytes):
+            path_or_buffer = BytesIO(path_or_buffer)
+
         # Could be a str, ExcelFile, Book, etc.
         self.io = path_or_buffer
         # Always a string

From fed6c6996cb21c620db08ca7de13e0c0b640ffc1 Mon Sep 17 00:00:00 2001
From: Mike Taves <mwtoews@gmail.com>
Date: Sat, 11 Sep 2021 01:27:54 +0000
Subject: [PATCH 4/4] Fixes from pre-commit [automated commit]

---
 pandas/io/common.py                                | 6 ++++--
 pandas/tests/io/parser/common/test_common_basic.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index d517d386d558a..a3aec4debdf44 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -739,8 +739,10 @@ def get_handle(
         )
 
     if "r" in ioargs.mode and not hasattr(handle, "read"):
-        raise TypeError('Expected file path name or file-like object, '
-                        f'got {type(ioargs.filepath_or_buffer)} type')
+        raise TypeError(
+            "Expected file path name or file-like object, "
+            f"got {type(ioargs.filepath_or_buffer)} type"
+        )
 
     handles.reverse()  # close the most recently added buffer first
     if ioargs.should_close:
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 243a293603af5..635d848906fdb 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -498,7 +498,7 @@ def test_read_filepath_or_buffer(all_parsers):
     parser = all_parsers
 
     with pytest.raises(TypeError, match="Expected file path name or file-like"):
-        parser.read_csv(filepath_or_buffer=b'input')
+        parser.read_csv(filepath_or_buffer=b"input")
 
 
 @xfail_pyarrow