From e19455d2cb2ce250ad2b69c99f9c0217d9669c9f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 07:54:01 -0500
Subject: [PATCH 1/7] ENH: Add dtype argument to str.decode

---
 doc/source/whatsnew/v2.3.0.rst       |  1 +
 pandas/core/strings/accessor.py      | 16 ++++++++++++++--
 pandas/tests/strings/test_strings.py | 21 +++++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 8bdddb5b7f85d..ea00d5df7f291 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -39,6 +39,7 @@ Other enhancements
 - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype``  (:issue:`60663`)
 - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
+- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`???`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.notable_bug_fixes:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index b854338c2d1d7..0840bd61fe8d0 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -34,6 +34,7 @@
     is_numeric_dtype,
     is_object_dtype,
     is_re,
+    is_string_dtype,
 )
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
@@ -2102,7 +2103,7 @@ def slice_replace(self, start=None, stop=None, repl=None):
         result = self._data.array._str_slice_replace(start, stop, repl)
         return self._wrap_result(result)
 
-    def decode(self, encoding, errors: str = "strict"):
+    def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None):
         """
         Decode character string in the Series/Index using indicated encoding.
 
@@ -2116,6 +2117,10 @@ def decode(self, encoding, errors: str = "strict"):
         errors : str, optional
             Specifies the error handling scheme.
             Possible values are those supported by :meth:`bytes.decode`.
+        dtype : str or dtype, optional
+            The dtype of the result. When not ``None``, must be either a string or
+            object dtype. When ``None``, the dtype of the result is determined by
+            ``pd.options.future.infer_string``.
 
         Returns
         -------
@@ -2137,6 +2142,12 @@ def decode(self, encoding, errors: str = "strict"):
         2   ()
         dtype: object
         """
+        if (
+            dtype is not None
+            and not is_string_dtype(dtype)
+            and not is_object_dtype(dtype)
+        ):
+            raise ValueError(f"dtype must be string or object, got {dtype=}")
         # TODO: Add a similar _bytes interface.
         if encoding in _cpython_optimized_decoders:
             # CPython optimized implementation
@@ -2146,7 +2157,8 @@ def decode(self, encoding, errors: str = "strict"):
             f = lambda x: decoder(x, errors)[0]
         arr = self._data.array
         result = arr._str_map(f)
-        dtype = "str" if get_option("future.infer_string") else None
+        if dtype is None:
+            dtype = "str" if get_option("future.infer_string") else None
         return self._wrap_result(result, dtype=dtype)
 
     @forbid_nonstring_types(["bytes"])
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index ee531b32aa82d..58cccbdcc45ea 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -601,6 +601,27 @@ def test_decode_errors_kwarg():
     tm.assert_series_equal(result, expected)
 
 
+def test_decode_string_dtype(string_dtype):
+    ser = Series([b"a", b"b"])
+    result = ser.str.decode("utf-8", dtype=string_dtype)
+    expected = Series(["a", "b"], dtype=string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_object_dtype(object_dtype):
+    ser = Series([b"a", rb"\ud800"])
+    result = ser.str.decode("utf-8", dtype=object_dtype)
+    expected = Series(["a", r"\ud800"], dtype=object_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_bad_dtype():
+    ser = Series([b"a", b"b"])
+    msg = "dtype must be string or object, got dtype='int64'"
+    with pytest.raises(ValueError, match=msg):
+        ser.str.decode("utf-8", dtype="int64")
+
+
 @pytest.mark.parametrize(
     "form, expected",
     [

From d37469fb38011380576618ecfff3be00b02f0f73 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 07:56:30 -0500
Subject: [PATCH 2/7] Refinements

---
 doc/source/whatsnew/v2.3.0.rst       | 2 +-
 pandas/core/strings/accessor.py      | 2 ++
 pandas/tests/strings/test_strings.py | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index ea00d5df7f291..42beb9080b66f 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -37,9 +37,9 @@ Other enhancements
   updated to work correctly with NumPy >= 2 (:issue:`57739`)
 - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
 - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype``  (:issue:`60663`)
+- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`)
 - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`)
 - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
-- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`???`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_230.notable_bug_fixes:
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 0840bd61fe8d0..94731d47032d4 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2122,6 +2122,8 @@ def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None)
             object dtype. When ``None``, the dtype of the result is determined by
             ``pd.options.future.infer_string``.
 
+            .. versionadded:: 2.3.0
+
         Returns
         -------
         Series or Index
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 58cccbdcc45ea..025f837982595 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -602,6 +602,7 @@ def test_decode_errors_kwarg():
 
 
 def test_decode_string_dtype(string_dtype):
+    # https://github.com/pandas-dev/pandas/pull/60940
     ser = Series([b"a", b"b"])
     result = ser.str.decode("utf-8", dtype=string_dtype)
     expected = Series(["a", "b"], dtype=string_dtype)
@@ -609,6 +610,7 @@ def test_decode_string_dtype(string_dtype):
 
 
 def test_decode_object_dtype(object_dtype):
+    # https://github.com/pandas-dev/pandas/pull/60940
     ser = Series([b"a", rb"\ud800"])
     result = ser.str.decode("utf-8", dtype=object_dtype)
     expected = Series(["a", r"\ud800"], dtype=object_dtype)
@@ -616,6 +618,7 @@ def test_decode_object_dtype(object_dtype):
 
 
 def test_decode_bad_dtype():
+    # https://github.com/pandas-dev/pandas/pull/60940
     ser = Series([b"a", b"b"])
     msg = "dtype must be string or object, got dtype='int64'"
     with pytest.raises(ValueError, match=msg):

From 797f99c53f4c07f06460614e3e0197b8552d9031 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 07:59:27 -0500
Subject: [PATCH 3/7] cleanup

---
 pandas/core/strings/accessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 94731d47032d4..955638984aab9 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2159,8 +2159,8 @@ def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None)
             f = lambda x: decoder(x, errors)[0]
         arr = self._data.array
         result = arr._str_map(f)
-        if dtype is None:
-            dtype = "str" if get_option("future.infer_string") else None
+        if dtype is None and get_option("future.infer_string"):
+            dtype = "str"
         return self._wrap_result(result, dtype=dtype)
 
     @forbid_nonstring_types(["bytes"])

From 6cd5f0207e3bba0e491c5c5780310d3a33c10de4 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 07:59:55 -0500
Subject: [PATCH 4/7] cleanup

---
 pandas/core/strings/accessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 955638984aab9..87ef62e1aeab4 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2150,6 +2150,8 @@ def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None)
             and not is_object_dtype(dtype)
         ):
             raise ValueError(f"dtype must be string or object, got {dtype=}")
+        if dtype is None and get_option("future.infer_string"):
+            dtype = "str"
         # TODO: Add a similar _bytes interface.
         if encoding in _cpython_optimized_decoders:
             # CPython optimized implementation
@@ -2159,8 +2161,6 @@ def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None)
             f = lambda x: decoder(x, errors)[0]
         arr = self._data.array
         result = arr._str_map(f)
-        if dtype is None and get_option("future.infer_string"):
-            dtype = "str"
         return self._wrap_result(result, dtype=dtype)
 
     @forbid_nonstring_types(["bytes"])

From 5a836bb6deb1777488b73f263001c627713b3cfa Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 16 Feb 2025 08:23:00 -0500
Subject: [PATCH 5/7] type-hint fixup

---
 pandas/core/strings/accessor.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 87ef62e1aeab4..6fb7207f45ea8 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2103,7 +2103,9 @@ def slice_replace(self, start=None, stop=None, repl=None):
         result = self._data.array._str_slice_replace(start, stop, repl)
         return self._wrap_result(result)
 
-    def decode(self, encoding, errors: str = "strict", dtype: str | DtypeObj = None):
+    def decode(
+        self, encoding, errors: str = "strict", dtype: str | DtypeObj | None = None
+    ):
         """
         Decode character string in the Series/Index using indicated encoding.
 

From ee2d377dba883564e2d30a611b5893b663b0a3d2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Mon, 17 Feb 2025 16:18:09 -0500
Subject: [PATCH 6/7] Simplify condition

---
 pandas/core/strings/accessor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 6fb7207f45ea8..449a492557fa2 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2149,7 +2149,6 @@ def decode(
         if (
             dtype is not None
             and not is_string_dtype(dtype)
-            and not is_object_dtype(dtype)
         ):
             raise ValueError(f"dtype must be string or object, got {dtype=}")
         if dtype is None and get_option("future.infer_string"):

From 91d6be373598e2c9dde1ed548ad7e35ffdbbec55 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 17 Feb 2025 17:32:24 -0500
Subject: [PATCH 7/7] lint

---
 pandas/core/strings/accessor.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 449a492557fa2..75fbd642c3520 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2146,10 +2146,7 @@ def decode(
         2   ()
         dtype: object
         """
-        if (
-            dtype is not None
-            and not is_string_dtype(dtype)
-        ):
+        if dtype is not None and not is_string_dtype(dtype):
             raise ValueError(f"dtype must be string or object, got {dtype=}")
         if dtype is None and get_option("future.infer_string"):
             dtype = "str"