Skip to content

Commit 5f36af3

Browse files
authored
MAINT: rename IOError -> OSError (#43366)
* MAINT: rename IOError -> OSError * BUG: use TypeError (not OSError) when read_csv expects file path name or file-like object * FIX: bytes -> BytesIO buffer in __init__ for BaseExcelReader & ExcelFile * Fixes from pre-commit [automated commit]
1 parent f72f566 commit 5f36af3

File tree

12 files changed

+45
-27
lines changed

12 files changed

+45
-27
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ I/O
394394
- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
395395
- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
396396
- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
397+
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
397398

398399
Period
399400
^^^^^^

pandas/_libs/parsers.pyx

-4
Original file line numberDiff line numberDiff line change
@@ -606,10 +606,6 @@ cdef class TextReader:
606606
cdef:
607607
void *ptr
608608

609-
if not hasattr(source, "read"):
610-
raise IOError(f'Expected file path name or file-like object, '
611-
f'got {type(source)} type')
612-
613609
ptr = new_rd_source(source)
614610
self.parser.source = ptr
615611
self.parser.cb_io = &buffer_rd_bytes

pandas/_testing/_io.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _get_default_network_errors():
7070
# Lazy import for http.client because it imports many things from the stdlib
7171
import http.client
7272

73-
return (IOError, http.client.HTTPException, TimeoutError)
73+
return (OSError, http.client.HTTPException, TimeoutError)
7474

7575

7676
def optional_args(decorator):
@@ -135,7 +135,7 @@ def network(
135135
If True, checks connectivity before running the test case.
136136
error_classes : tuple or Exception
137137
error classes to ignore. If not in ``error_classes``, raises the error.
138-
defaults to IOError. Be careful about changing the error classes here.
138+
defaults to OSError. Be careful about changing the error classes here.
139139
skip_errnos : iterable of int
140140
Any exception that has .errno or .reason.erno set to one
141141
of these values will be skipped with an appropriate
@@ -165,19 +165,20 @@ def network(
165165
... def test_network():
166166
... with pd.io.common.urlopen("rabbit://bonanza.com"):
167167
... pass
168+
>>> test_network()
168169
Traceback
169170
...
170-
URLError: <urlopen error unknown url type: rabit>
171+
URLError: <urlopen error unknown url type: rabbit>
171172
172173
You can specify alternative URLs::
173174
174175
>>> @ts.network("https://www.yahoo.com")
175176
... def test_something_with_yahoo():
176-
... raise IOError("Failure Message")
177+
... raise OSError("Failure Message")
177178
>>> test_something_with_yahoo()
178179
Traceback (most recent call last):
179180
...
180-
IOError: Failure Message
181+
OSError: Failure Message
181182
182183
If you set check_before_test, it will check the url first and not run the
183184
test on failure::
@@ -241,7 +242,7 @@ def wrapper(*args, **kwargs):
241242

242243
def can_connect(url, error_classes=None):
243244
"""
244-
Try to connect to the given url. True if succeeds, False if IOError
245+
Try to connect to the given url. True if succeeds, False if OSError
245246
raised
246247
247248
Parameters
@@ -252,7 +253,7 @@ def can_connect(url, error_classes=None):
252253
Returns
253254
-------
254255
connectable : bool
255-
Return True if no IOError (unable to connect) or URLError (bad url) was
256+
Return True if no OSError (unable to connect) or URLError (bad url) was
256257
raised
257258
"""
258259
if error_classes is None:

pandas/io/common.py

+6
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,12 @@ def get_handle(
739739
isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close
740740
)
741741

742+
if "r" in ioargs.mode and not hasattr(handle, "read"):
743+
raise TypeError(
744+
"Expected file path name or file-like object, "
745+
f"got {type(ioargs.filepath_or_buffer)} type"
746+
)
747+
742748
handles.reverse() # close the most recently added buffer first
743749
if ioargs.should_close:
744750
assert not isinstance(ioargs.filepath_or_buffer, str)

pandas/io/excel/_base.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,10 @@ def read_excel(
408408

409409
class BaseExcelReader(metaclass=abc.ABCMeta):
410410
def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
411+
# First argument can also be bytes, so create a buffer
412+
if isinstance(filepath_or_buffer, bytes):
413+
filepath_or_buffer = BytesIO(filepath_or_buffer)
414+
411415
self.handles = IOHandles(
412416
handle=filepath_or_buffer, compression={"method": None}
413417
)
@@ -426,8 +430,6 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
426430
except Exception:
427431
self.close()
428432
raise
429-
elif isinstance(self.handles.handle, bytes):
430-
self.book = self.load_workbook(BytesIO(self.handles.handle))
431433
else:
432434
raise ValueError(
433435
"Must explicitly set engine if not passing in buffer or path for io."
@@ -1115,7 +1117,7 @@ class ExcelFile:
11151117
11161118
Parameters
11171119
----------
1118-
path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath),
1120+
path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath),
11191121
a file-like object, xlrd workbook or openpyxl workbook.
11201122
If a string or path object, expected to be a path to a
11211123
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
@@ -1174,6 +1176,10 @@ def __init__(
11741176
if engine is not None and engine not in self._engines:
11751177
raise ValueError(f"Unknown engine: {engine}")
11761178

1179+
# First argument can also be bytes, so create a buffer
1180+
if isinstance(path_or_buffer, bytes):
1181+
path_or_buffer = BytesIO(path_or_buffer)
1182+
11771183
# Could be a str, ExcelFile, Book, etc.
11781184
self.io = path_or_buffer
11791185
# Always a string

pandas/io/sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from pandas.util.version import Version
4848

4949

50-
class DatabaseError(IOError):
50+
class DatabaseError(OSError):
5151
pass
5252

5353

pandas/tests/io/formats/test_console.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
3939
assert detect_console_encoding() == filled
4040

4141

42-
@pytest.mark.parametrize("encoding", [AttributeError, IOError, "ascii"])
42+
@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
4343
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
4444
# GH 21552
4545
with monkeypatch.context() as context:
@@ -55,8 +55,8 @@ def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
5555
["ascii", locale.Error],
5656
[AttributeError, "ascii"],
5757
[AttributeError, locale.Error],
58-
[IOError, "ascii"],
59-
[IOError, locale.Error],
58+
[OSError, "ascii"],
59+
[OSError, locale.Error],
6060
],
6161
)
6262
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):

pandas/tests/io/parser/common/test_common_basic.py

+8
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,14 @@ def test_raise_on_sep_with_delim_whitespace(all_parsers):
506506
parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
507507

508508

509+
def test_read_filepath_or_buffer(all_parsers):
510+
# see gh-43366
511+
parser = all_parsers
512+
513+
with pytest.raises(TypeError, match="Expected file path name or file-like"):
514+
parser.read_csv(filepath_or_buffer=b"input")
515+
516+
509517
@xfail_pyarrow
510518
@pytest.mark.parametrize("delim_whitespace", [True, False])
511519
def test_single_char_leading_whitespace(all_parsers, delim_whitespace):

pandas/tests/io/parser/test_network.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,12 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so):
204204

205205
def test_read_s3_fails(self, s3so):
206206
msg = "The specified bucket does not exist"
207-
with pytest.raises(IOError, match=msg):
207+
with pytest.raises(OSError, match=msg):
208208
read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
209209

210210
# Receive a permission error when trying to read a private bucket.
211211
# It's irrelevant here that this isn't actually a table.
212-
with pytest.raises(IOError, match=msg):
212+
with pytest.raises(OSError, match=msg):
213213
read_csv("s3://cant_get_it/file.csv")
214214

215215
@pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)

pandas/tests/io/pytables/test_errors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,15 @@ def test_read_hdf_errors(setup_path):
214214

215215
with ensure_clean_path(setup_path) as path:
216216
msg = r"File [\S]* does not exist"
217-
with pytest.raises(IOError, match=msg):
217+
with pytest.raises(OSError, match=msg):
218218
read_hdf(path, "key")
219219

220220
df.to_hdf(path, "df")
221221
store = HDFStore(path, mode="r")
222222
store.close()
223223

224224
msg = "The HDFStore must be open for reading."
225-
with pytest.raises(IOError, match=msg):
225+
with pytest.raises(OSError, match=msg):
226226
read_hdf(store, "df")
227227

228228

pandas/tests/io/pytables/test_file_handling.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def check(mode):
4040

4141
# constructor
4242
if mode in ["r", "r+"]:
43-
with pytest.raises(IOError, match=msg):
43+
with pytest.raises(OSError, match=msg):
4444
HDFStore(path, mode=mode)
4545

4646
else:
@@ -52,7 +52,7 @@ def check(mode):
5252

5353
# context
5454
if mode in ["r", "r+"]:
55-
with pytest.raises(IOError, match=msg):
55+
with pytest.raises(OSError, match=msg):
5656
with HDFStore(path, mode=mode) as store:
5757
pass
5858
else:
@@ -63,7 +63,7 @@ def check(mode):
6363

6464
# conv write
6565
if mode in ["r", "r+"]:
66-
with pytest.raises(IOError, match=msg):
66+
with pytest.raises(OSError, match=msg):
6767
df.to_hdf(path, "df", mode=mode)
6868
df.to_hdf(path, "df", mode="w")
6969
else:

pandas/tests/io/test_common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def test_iterator(self):
195195
(pd.read_csv, "os", FileNotFoundError, "csv"),
196196
(pd.read_fwf, "os", FileNotFoundError, "txt"),
197197
(pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
198-
(pd.read_feather, "pyarrow", IOError, "feather"),
198+
(pd.read_feather, "pyarrow", OSError, "feather"),
199199
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
200200
(pd.read_stata, "os", FileNotFoundError, "dta"),
201201
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
@@ -234,7 +234,7 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext):
234234
(pd.read_table, "os", FileNotFoundError, "csv"),
235235
(pd.read_fwf, "os", FileNotFoundError, "txt"),
236236
(pd.read_excel, "xlrd", FileNotFoundError, "xlsx"),
237-
(pd.read_feather, "pyarrow", IOError, "feather"),
237+
(pd.read_feather, "pyarrow", OSError, "feather"),
238238
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
239239
(pd.read_stata, "os", FileNotFoundError, "dta"),
240240
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),

0 commit comments

Comments
 (0)