Skip to content

Commit 6a65c64

Browse files
authored
BUG: Fixed bug when creating new column with missing values when setting a single string value (pandas-dev#56321)
1 parent 98e1d2f commit 6a65c64

File tree

4 files changed

+30
-5
lines changed

4 files changed

+30
-5
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ Indexing
618618
- Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
619619
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
620620
- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
621+
- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
621622

622623
Missing
623624
^^^^^^^

pandas/core/indexing.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
from pandas.core.construction import (
6969
array as pd_array,
7070
extract_array,
71+
sanitize_array,
7172
)
7273
from pandas.core.indexers import (
7374
check_array_indexer,
@@ -1876,7 +1877,13 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
18761877
return
18771878

18781879
self.obj[key] = empty_value
1879-
1880+
elif not is_list_like(value):
1881+
# Find our empty_value dtype by constructing an array
1882+
# from our value and doing a .take on it
1883+
arr = sanitize_array(value, Index(range(1)), copy=False)
1884+
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1885+
empty_value = algos.take_nd(arr, taker)
1886+
self.obj[key] = empty_value
18801887
else:
18811888
# FIXME: GH#42099#issuecomment-864326014
18821889
self.obj[key] = infer_fill_value(value)

pandas/tests/frame/indexing/test_indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,26 @@ def test_adding_new_conditional_column() -> None:
19351935
tm.assert_frame_equal(df, expected)
19361936

19371937

1938+
@pytest.mark.parametrize(
1939+
("dtype", "infer_string"),
1940+
[
1941+
(object, False),
1942+
("string[pyarrow_numpy]", True),
1943+
],
1944+
)
1945+
def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
1946+
# https://github.com/pandas-dev/pandas/issues/56204
1947+
pytest.importorskip("pyarrow")
1948+
1949+
df = DataFrame({"a": [1, 2], "b": [3, 4]})
1950+
with pd.option_context("future.infer_string", infer_string):
1951+
df.loc[df["a"] == 1, "c"] = "1"
1952+
expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", float("nan")]}).astype(
1953+
{"a": "int64", "b": "int64", "c": dtype}
1954+
)
1955+
tm.assert_frame_equal(df, expected)
1956+
1957+
19381958
def test_add_new_column_infer_string():
19391959
# GH#55366
19401960
pytest.importorskip("pyarrow")

pandas/tests/frame/indexing/test_set_value.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@ def test_set_value_resize(self, float_frame, using_infer_string):
3232
else:
3333
assert res["baz"].dtype == np.object_
3434
res = float_frame.copy()
35-
with tm.assert_produces_warning(
36-
FutureWarning, match="Setting an item of incompatible dtype"
37-
):
38-
res._set_value("foobar", "baz", True)
35+
res._set_value("foobar", "baz", True)
3936
assert res["baz"].dtype == np.object_
4037

4138
res = float_frame.copy()

0 commit comments

Comments
 (0)