diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 826c5a795f886..3566d58f5c641 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1599,6 +1599,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) +- Bug in :func:`read_sas()` in which an incorrect error was raised on an invalid file format. (:issue:`24548`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) - Bug in :func:`to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 2da3775d5a6a7..9fae0da670bec 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -16,8 +16,8 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, filepath_or_buffer : string or file-like object Path to the SAS file. format : string {'xport', 'sas7bdat'} or None - If None, file format is inferred. If 'xport' or 'sas7bdat', - uses the corresponding format. + If None, file format is inferred from file extension. If 'xport' or + 'sas7bdat', uses the corresponding format. index : identifier of index column, defaults to None Identifier of column that should be used as index of the DataFrame. encoding : string, default is None @@ -39,16 +39,13 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, compat.string_types): raise ValueError(buffer_error_msg) - try: - fname = filepath_or_buffer.lower() - if fname.endswith(".xpt"): - format = "xport" - elif fname.endswith(".sas7bdat"): - format = "sas7bdat" - else: - raise ValueError("unable to infer format of SAS file") - except ValueError: - pass + fname = filepath_or_buffer.lower() + if fname.endswith(".xpt"): + format = "xport" + elif fname.endswith(".sas7bdat"): + format = "sas7bdat" + else: + raise ValueError("unable to infer format of SAS file") if format.lower() == 'xport': from pandas.io.sas.sas_xport import XportReader diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 0f6342aa62ac0..34bca1e5b74a1 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -3,6 +3,7 @@ from pandas.compat import StringIO from pandas import read_sas +import pandas.util.testing as tm class TestSas(object): @@ -15,3 +16,10 @@ def test_sas_buffer_format(self): "name, you must specify a format string") with pytest.raises(ValueError, match=msg): read_sas(b) + + def test_sas_read_no_format_or_extension(self): + # see gh-24548 + msg = ("unable to infer format of SAS file") + with tm.ensure_clean('test_file_no_extension') as path: + with pytest.raises(ValueError, match=msg): + read_sas(path)