From 910ed9be46891881eb87c3c3c556f2120e350d58 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Mon, 4 Dec 2023 13:55:36 +0000
Subject: [PATCH 1/8] wip

---
 pandas/core/dtypes/missing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index a635ac77566e1..94c65d17bf8b8 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -643,7 +643,8 @@ def infer_fill_value(val):
             return np.array("NaT", dtype=TD64NS_DTYPE)
         return np.array(np.nan, dtype=object)
     elif val.dtype.kind == "U":
-        return np.array(np.nan, dtype=val.dtype)
+        return np.array(np.nan, dtype=object)
+        # return np.array(np.nan, dtype=val.dtype)
     return np.nan
 
 

From b3774b18e1877267b2d51a2f0947f281f90d15ee Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Mon, 4 Dec 2023 14:33:13 +0000
Subject: [PATCH 2/8] Fixed bug when creating new column with missing values
 when setting a single string value

---
 doc/source/whatsnew/v2.1.4.rst               |  2 +-
 pandas/core/dtypes/missing.py                | 11 +++++++---
 pandas/core/indexing.py                      |  4 +++-
 pandas/tests/frame/indexing/test_indexing.py | 22 ++++++++++++++++++++
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 9e3eb90436642..6c3a4f33ceb22 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -13,7 +13,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
--
+- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 94c65d17bf8b8..a82305ad20f7b 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -624,7 +624,7 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val):
+def infer_fill_value(val, index: Index = None):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped
@@ -643,8 +643,13 @@ def infer_fill_value(val):
             return np.array("NaT", dtype=TD64NS_DTYPE)
         return np.array(np.nan, dtype=object)
     elif val.dtype.kind == "U":
-        return np.array(np.nan, dtype=object)
-        # return np.array(np.nan, dtype=val.dtype)
+        if get_option("future.infer_string"):
+            from pandas import Series
+
+            return Series(
+                [np.nan] * len(index), dtype="string[pyarrow_numpy]", index=index
+            )
+        return None
     return np.nan
 
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index e3928621a4e48..78cece7522406 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1879,7 +1879,9 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
 
                         else:
                             # FIXME: GH#42099#issuecomment-864326014
-                            self.obj[key] = infer_fill_value(value)
+                            self.obj[key] = infer_fill_value(
+                                value, index=self.obj.index
+                            )
 
                         new_indexer = convert_from_missing_indexer_tuple(
                             indexer, self.obj.axes
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index dfb4a3092789a..a296c75784dc0 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1922,6 +1922,28 @@ def test_adding_new_conditional_column() -> None:
     tm.assert_frame_equal(df, expected)
 
 
+def test_adding_new_conditional_column_with_string() -> None:
+    # https://github.com/pandas-dev/pandas/issues/56204
+    df = DataFrame({"a": [1, 2], "b": [3, 4]})
+    df.loc[lambda x: x.a == 1, "c"] = "1"
+    expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
+        {"a": "int64", "b": "int64", "c": "object"}
+    )
+    tm.assert_frame_equal(df, expected)
+
+
+def test_adding_new_conditional_column_with_infer_string() -> None:
+    # https://github.com/pandas-dev/pandas/issues/56204
+    pytest.importorskip("pyarrow")
+    df = DataFrame({"a": [1, 2], "b": [3, 4]})
+    with pd.option_context("future.infer_string", True):
+        df.loc[lambda x: x.a == 1, "c"] = "1"
+    expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
+        {"a": "int64", "b": "int64", "c": "string[pyarrow_numpy]"}
+    )
+    tm.assert_frame_equal(df, expected)
+
+
 def test_add_new_column_infer_string():
     # GH#55366
     pytest.importorskip("pyarrow")

From 5ab3d09dde51e65ccd23fd3169679a67b2b12eac Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Mon, 4 Dec 2023 15:13:33 +0000
Subject: [PATCH 3/8] typing

---
 pandas/core/dtypes/missing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index a82305ad20f7b..64a634846549f 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -624,7 +624,7 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val, index: Index = None):
+def infer_fill_value(val, index: Index):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped

From 61cdef1bd42da74c75910b95e01be6fce87e8522 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 6 Dec 2023 09:51:58 +0000
Subject: [PATCH 4/8] use pd.array instead

---
 pandas/core/dtypes/missing.py | 8 +++-----
 pandas/core/indexing.py       | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index c67a24da6f50c..3cf38d5692e0e 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -624,7 +624,7 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val, index: Index):
+def infer_fill_value(val, length: int):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped
@@ -644,11 +644,9 @@ def infer_fill_value(val, index: Index):
         return np.array(np.nan, dtype=object)
     elif val.dtype.kind == "U":
         if get_option("future.infer_string"):
-            from pandas import Series
+            from pandas.core.construction import array as pd_array
 
-            return Series(
-                [np.nan] * len(index), dtype="string[pyarrow_numpy]", index=index
-            )
+            return pd_array([np.nan] * length, dtype="string[pyarrow_numpy]")
         return None
     return np.nan
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 78cece7522406..3278f960227e2 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1880,7 +1880,7 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
                         else:
                             # FIXME: GH#42099#issuecomment-864326014
                             self.obj[key] = infer_fill_value(
-                                value, index=self.obj.index
+                                value, length=len(self.obj)
                             )
 
                         new_indexer = convert_from_missing_indexer_tuple(

From 9e7055af16bc1d8e449ebd6824e01687424a9139 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 6 Dec 2023 18:27:55 +0000
Subject: [PATCH 5/8] move in whatsnew section, parametrise

---
 doc/source/whatsnew/v2.1.4.rst               |  2 +-
 pandas/tests/frame/indexing/test_indexing.py | 26 +++++++++-----------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 40ab5c324787b..59c0b2d2c5173 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -13,7 +13,6 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
-- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
 - Fixed regression when trying to read a pickled pandas :class:`DataFrame` from pandas 1.3 (:issue:`55137`)
 -
 
@@ -33,6 +32,7 @@ Bug fixes
 - Fixed bug in :meth:`Series.mode` not keeping object dtype when ``infer_string`` is set (:issue:`56183`)
 - Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`)
 - Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
+- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_214.other:
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index a296c75784dc0..2e437604b7eb0 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1922,24 +1922,22 @@ def test_adding_new_conditional_column() -> None:
     tm.assert_frame_equal(df, expected)
 
 
-def test_adding_new_conditional_column_with_string() -> None:
-    # https://github.com/pandas-dev/pandas/issues/56204
-    df = DataFrame({"a": [1, 2], "b": [3, 4]})
-    df.loc[lambda x: x.a == 1, "c"] = "1"
-    expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
-        {"a": "int64", "b": "int64", "c": "object"}
-    )
-    tm.assert_frame_equal(df, expected)
-
-
-def test_adding_new_conditional_column_with_infer_string() -> None:
+@pytest.mark.parametrize(
+    ("dtype", "infer_string"),
+    [
+        (object, False),
+        ("string[pyarrow_numpy]", True),
+    ],
+)
+def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     # https://github.com/pandas-dev/pandas/issues/56204
     pytest.importorskip("pyarrow")
+
     df = DataFrame({"a": [1, 2], "b": [3, 4]})
-    with pd.option_context("future.infer_string", True):
-        df.loc[lambda x: x.a == 1, "c"] = "1"
+    with pd.option_context("future.infer_string", infer_string):
+        df.loc[df["a"] == 1, "c"] = "1"
     expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
-        {"a": "int64", "b": "int64", "c": "string[pyarrow_numpy]"}
+        {"a": "int64", "b": "int64", "c": dtype}
     )
     tm.assert_frame_equal(df, expected)
 

From 20898db5f8d7e32b3ba1420de2e3b82ed69326d0 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 7 Dec 2023 10:53:27 +0000
Subject: [PATCH 6/8] find empty_value dtype by constructing array and doing
 take

---
 pandas/core/indexing.py                       | 13 +++++++++----
 pandas/tests/frame/indexing/test_indexing.py  |  2 +-
 pandas/tests/frame/indexing/test_set_value.py |  5 +----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 3278f960227e2..12590b6c5f9e2 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -68,6 +68,7 @@
 from pandas.core.construction import (
     array as pd_array,
     extract_array,
+    sanitize_array,
 )
 from pandas.core.indexers import (
     check_array_indexer,
@@ -1876,12 +1877,16 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
                                 return
 
                             self.obj[key] = empty_value
-
+                        elif not is_list_like(value):
+                            # Find our empty_value dtype by constructing an array
+                            #  from our value and doing a .take on it
+                            arr = sanitize_array(value, Index(range(1)), copy=False)
+                            taker = -1 * np.ones(len(self.obj), dtype=np.intp)
+                            empty_value = algos.take_nd(arr, taker)
+                            self.obj[key] = empty_value
                         else:
                             # FIXME: GH#42099#issuecomment-864326014
-                            self.obj[key] = infer_fill_value(
-                                value, length=len(self.obj)
-                            )
+                            self.obj[key] = infer_fill_value(value)
 
                         new_indexer = convert_from_missing_indexer_tuple(
                             indexer, self.obj.axes
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 2e437604b7eb0..5e14728201a6a 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1936,7 +1936,7 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     df = DataFrame({"a": [1, 2], "b": [3, 4]})
     with pd.option_context("future.infer_string", infer_string):
         df.loc[df["a"] == 1, "c"] = "1"
-    expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
+    expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", float("nan")]}).astype(
         {"a": "int64", "b": "int64", "c": dtype}
     )
     tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
index 32312868adacb..eec953ebb8fe6 100644
--- a/pandas/tests/frame/indexing/test_set_value.py
+++ b/pandas/tests/frame/indexing/test_set_value.py
@@ -30,10 +30,7 @@ def test_set_value_resize(self, float_frame):
         assert res["baz"].dtype == np.object_
 
         res = float_frame.copy()
-        with tm.assert_produces_warning(
-            FutureWarning, match="Setting an item of incompatible dtype"
-        ):
-            res._set_value("foobar", "baz", True)
+        res._set_value("foobar", "baz", True)
         assert res["baz"].dtype == np.object_
 
         res = float_frame.copy()

From 8c30505b2299d8bd3dc228fef9e1fc5161c4e35a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 7 Dec 2023 10:54:22 +0000
Subject: [PATCH 7/8] revert

---
 pandas/core/dtypes/missing.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 3cf38d5692e0e..4dc0d477f89e8 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -624,7 +624,7 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val, length: int):
+def infer_fill_value(val):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped
@@ -643,11 +643,7 @@ def infer_fill_value(val, length: int):
             return np.array("NaT", dtype=TD64NS_DTYPE)
         return np.array(np.nan, dtype=object)
     elif val.dtype.kind == "U":
-        if get_option("future.infer_string"):
-            from pandas.core.construction import array as pd_array
-
-            return pd_array([np.nan] * length, dtype="string[pyarrow_numpy]")
-        return None
+        return np.array(np.nan, dtype=val.dtype)
     return np.nan
 
 

From 40093c2286cc8f92eed3571796e7f3737a31be7b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 20 Dec 2023 19:14:34 +0000
Subject: [PATCH 8/8] move whatsnew note to 2.2.0

---
 doc/source/whatsnew/v2.1.4.rst | 1 -
 doc/source/whatsnew/v2.2.0.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst
index 5bd14bf93a2cf..57b83a294963b 100644
--- a/doc/source/whatsnew/v2.1.4.rst
+++ b/doc/source/whatsnew/v2.1.4.rst
@@ -35,7 +35,6 @@ Bug fixes
 - Fixed bug in :meth:`Series.reset_index` not preserving object dtype when ``infer_string`` is set (:issue:`56160`)
 - Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`)
 - Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)
-- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_214.contributors:
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 8c475791df64d..80d86805ded49 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -618,6 +618,7 @@ Indexing
 - Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
 - Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
 - Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
+- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
 
 Missing
 ^^^^^^^