Skip to content

Commit 00bed88

Browse files
committed
Fixed bug when creating new column with missing values when setting a single string value
1 parent 910ed9b commit 00bed88

File tree

4 files changed

+34
-5
lines changed

4 files changed

+34
-5
lines changed

doc/source/whatsnew/v2.1.4.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16-
-
16+
- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)
1717
-
1818

1919
.. ---------------------------------------------------------------------------

pandas/core/dtypes/missing.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
624624
return array_equivalent(left, right, dtype_equal=True)
625625

626626

627-
def infer_fill_value(val):
627+
def infer_fill_value(val, index: Index = None):
628628
"""
629629
infer the fill value for the nan/NaT from the provided
630630
scalar/ndarray/list-like if we are a NaT, return the correct dtyped
@@ -643,8 +643,13 @@ def infer_fill_value(val):
643643
return np.array("NaT", dtype=TD64NS_DTYPE)
644644
return np.array(np.nan, dtype=object)
645645
elif val.dtype.kind == "U":
646-
return np.array(np.nan, dtype=object)
647-
# return np.array(np.nan, dtype=val.dtype)
646+
if get_option("future.infer_string"):
647+
from pandas import Series
648+
649+
return Series(
650+
[np.nan] * len(index), dtype="string[pyarrow_numpy]", index=index
651+
)
652+
return None
648653
return np.nan
649654

650655

pandas/core/indexing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1879,7 +1879,9 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
18791879

18801880
else:
18811881
# FIXME: GH#42099#issuecomment-864326014
1882-
self.obj[key] = infer_fill_value(value)
1882+
self.obj[key] = infer_fill_value(
1883+
value, index=self.obj.index
1884+
)
18831885

18841886
new_indexer = convert_from_missing_indexer_tuple(
18851887
indexer, self.obj.axes

pandas/tests/frame/indexing/test_indexing.py

+22
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,28 @@ def test_adding_new_conditional_column() -> None:
19221922
tm.assert_frame_equal(df, expected)
19231923

19241924

1925+
def test_adding_new_column_with_string() -> None:
1926+
# https://github.com/pandas-dev/pandas/issues/56204
1927+
df = DataFrame({"a": [1, 2], "b": [3, 4]})
1928+
df.loc[lambda x: x.a == 1, "c"] = "1"
1929+
expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
1930+
{"a": "int64", "b": "int64", "c": "object"}
1931+
)
1932+
tm.assert_frame_equal(df, expected)
1933+
1934+
1935+
def test_adding_new_column_with_infer_string() -> None:
1936+
# https://github.com/pandas-dev/pandas/issues/56204
1937+
pytest.importorskip("pyarrow")
1938+
df = DataFrame({"a": [1, 2], "b": [3, 4]})
1939+
with pd.option_context("future.infer_string", True):
1940+
df.loc[lambda x: x.a == 1, "c"] = "1"
1941+
expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", None]}).astype(
1942+
{"a": "int64", "b": "int64", "c": "string[pyarrow_numpy]"}
1943+
)
1944+
tm.assert_frame_equal(df, expected)
1945+
1946+
19251947
def test_add_new_column_infer_string():
19261948
# GH#55366
19271949
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)