diff --git a/pandas/io/common.py b/pandas/io/common.py index 46be1f9bb09b2..ba1cc82bfea56 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -276,10 +276,6 @@ def _get_filepath_or_buffer( compression = dict(compression, method=compression_method) - # uniform encoding names - if encoding is not None: - encoding = encoding.replace("_", "-").lower() - # bz2 and xz do not write the byte order mark for utf-16 and utf-32 # print a warning when writing such files if ( @@ -602,25 +598,11 @@ def get_handle( if _is_binary_mode(path_or_buf, mode) and "b" not in mode: mode += "b" - # valdiate errors + # validate encoding and errors + if isinstance(encoding, str): + codecs.lookup(encoding) if isinstance(errors, str): - errors = errors.lower() - if errors not in ( - None, - "strict", - "ignore", - "replace", - "xmlcharrefreplace", - "backslashreplace", - "namereplace", - "surrogateescape", - "surrogatepass", - ): - raise ValueError( - f"Invalid value for `encoding_errors` ({errors}). Please see " - + "https://docs.python.org/3/library/codecs.html#error-handlers " - + "for valid values." - ) + codecs.lookup_error(errors) # open URLs ioargs = _get_filepath_or_buffer( diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ad0b25d26d6f6..699459ab3666d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -593,7 +593,7 @@ def test_encoding_errors(encoding_errors, format): def test_bad_encdoing_errors(): # GH 39777 with tm.ensure_clean() as path: - with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"): + with pytest.raises(LookupError, match="unknown error handler name"): icom.get_handle(path, "w", errors="bad") diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 7e9a03c2a59a8..a99f66336bf22 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -709,7 +709,7 @@ def test_utf16_encoding(datapath, parser): def test_unknown_encoding(datapath, parser): filename = datapath("io", "data", "xml", "baby_names.xml") - with pytest.raises(LookupError, match=("unknown encoding: uft-8")): + with pytest.raises(LookupError, match=("unknown encoding: UFT-8")): read_xml(filename, encoding="UFT-8", parser=parser)