From bbd9e99ccfbda0e05ce4d1c471e2c6e30b1c2a59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Thu, 16 Sep 2021 20:15:56 -0400
Subject: [PATCH 1/2] CLN: let `codecs` validate the possible values of
 encoding errors

---
 pandas/io/common.py            | 22 ++++------------------
 pandas/tests/io/test_common.py |  2 +-
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 46be1f9bb09b2..fcb9669e09ee3 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -602,25 +602,11 @@ def get_handle(
     if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
         mode += "b"
 
-    # valdiate errors
+    # validate encoding and errors
+    if isinstance(encoding, str):
+        codecs.lookup(encoding)
     if isinstance(errors, str):
-        errors = errors.lower()
-    if errors not in (
-        None,
-        "strict",
-        "ignore",
-        "replace",
-        "xmlcharrefreplace",
-        "backslashreplace",
-        "namereplace",
-        "surrogateescape",
-        "surrogatepass",
-    ):
-        raise ValueError(
-            f"Invalid value for `encoding_errors` ({errors}). Please see "
-            + "https://docs.python.org/3/library/codecs.html#error-handlers "
-            + "for valid values."
-        )
+        codecs.lookup_error(errors)
 
     # open URLs
     ioargs = _get_filepath_or_buffer(
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index ad0b25d26d6f6..699459ab3666d 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -593,7 +593,7 @@ def test_encoding_errors(encoding_errors, format):
 def test_bad_encdoing_errors():
     # GH 39777
     with tm.ensure_clean() as path:
-        with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"):
+        with pytest.raises(LookupError, match="unknown error handler name"):
             icom.get_handle(path, "w", errors="bad")
 
 

From 1fa9c26a46f75acbf8f9c19095d6274350c4e18d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Thu, 16 Sep 2021 22:06:50 -0400
Subject: [PATCH 2/2] stricter parsing of encoding

---
 pandas/io/common.py             | 4 ----
 pandas/tests/io/xml/test_xml.py | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index fcb9669e09ee3..ba1cc82bfea56 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -276,10 +276,6 @@ def _get_filepath_or_buffer(
 
     compression = dict(compression, method=compression_method)
 
-    # uniform encoding names
-    if encoding is not None:
-        encoding = encoding.replace("_", "-").lower()
-
     # bz2 and xz do not write the byte order mark for utf-16 and utf-32
     # print a warning when writing such files
     if (
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 7e9a03c2a59a8..a99f66336bf22 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -709,7 +709,7 @@ def test_utf16_encoding(datapath, parser):
 
 def test_unknown_encoding(datapath, parser):
     filename = datapath("io", "data", "xml", "baby_names.xml")
-    with pytest.raises(LookupError, match=("unknown encoding: uft-8")):
+    with pytest.raises(LookupError, match=("unknown encoding: UFT-8")):
         read_xml(filename, encoding="UFT-8", parser=parser)