Skip to content

Commit 5550bdb

Browse files
Backport PR #57402 on branch 2.2.x (BUG: wrong future Warning on string assignment in certain condition) (#57460)
Backport PR #57402: BUG: wrong future Warning on string assignment in certain condition Co-authored-by: Marco Edward Gorelli <[email protected]>
1 parent 11818ad commit 5550bdb

File tree

4 files changed

+47
-9
lines changed

4 files changed

+47
-9
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :meth:`CategoricalIndex.difference` raising ``KeyError`` when other contains null values other than NaN (:issue:`57318`)
2525
- Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`)
2626
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
27+
- Fixed regression in :meth:`DataFrame.loc` which was unnecessarily throwing "incompatible dtype warning" when expanding with partial row indexer and multiple columns (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) (:issue:`56503`)
2728
- Fixed regression in :meth:`DataFrame.map` with ``na_action="ignore"`` not being respected for NumPy nullable and :class:`ArrowDtypes` (:issue:`57316`)
2829
- Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`)
2930
- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`)

pandas/core/dtypes/missing.py

+14
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,20 @@ def infer_fill_value(val):
647647
return np.nan
648648

649649

650+
def construct_1d_array_from_inferred_fill_value(
651+
value: object, length: int
652+
) -> ArrayLike:
653+
# Find our empty_value dtype by constructing an array
654+
# from our value and doing a .take on it
655+
from pandas.core.algorithms import take_nd
656+
from pandas.core.construction import sanitize_array
657+
from pandas.core.indexes.base import Index
658+
659+
arr = sanitize_array(value, Index(range(1)), copy=False)
660+
taker = -1 * np.ones(length, dtype=np.intp)
661+
return take_nd(arr, taker)
662+
663+
650664
def maybe_fill(arr: np.ndarray) -> np.ndarray:
651665
"""
652666
Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype.

pandas/core/indexing.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
ABCSeries,
5858
)
5959
from pandas.core.dtypes.missing import (
60+
construct_1d_array_from_inferred_fill_value,
6061
infer_fill_value,
6162
is_valid_na_for_dtype,
6263
isna,
@@ -68,7 +69,6 @@
6869
from pandas.core.construction import (
6970
array as pd_array,
7071
extract_array,
71-
sanitize_array,
7272
)
7373
from pandas.core.indexers import (
7474
check_array_indexer,
@@ -844,7 +844,6 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
844844
if self.ndim != 2:
845845
return
846846

847-
orig_key = key
848847
if isinstance(key, tuple) and len(key) > 1:
849848
# key may be a tuple if we are .loc
850849
# if length of key is > 1 set key to column part
@@ -862,7 +861,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
862861
keys = self.obj.columns.union(key, sort=False)
863862
diff = Index(key).difference(self.obj.columns, sort=False)
864863

865-
if len(diff) and com.is_null_slice(orig_key[0]):
864+
if len(diff):
866865
# e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
867866
# is a new column, add the new columns with dtype=np.void
868867
# so that later when we go through setitem_single_column
@@ -1878,12 +1877,9 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
18781877

18791878
self.obj[key] = empty_value
18801879
elif not is_list_like(value):
1881-
# Find our empty_value dtype by constructing an array
1882-
# from our value and doing a .take on it
1883-
arr = sanitize_array(value, Index(range(1)), copy=False)
1884-
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1885-
empty_value = algos.take_nd(arr, taker)
1886-
self.obj[key] = empty_value
1880+
self.obj[key] = construct_1d_array_from_inferred_fill_value(
1881+
value, len(self.obj)
1882+
)
18871883
else:
18881884
# FIXME: GH#42099#issuecomment-864326014
18891885
self.obj[key] = infer_fill_value(value)
@@ -2165,6 +2161,17 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21652161
else:
21662162
# set value into the column (first attempting to operate inplace, then
21672163
# falling back to casting if necessary)
2164+
dtype = self.obj.dtypes.iloc[loc]
2165+
if dtype == np.void:
2166+
# This means we're expanding, with multiple columns, e.g.
2167+
# df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
2168+
# df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')
2169+
# Columns F and G will initially be set to np.void.
2170+
# Here, we replace those temporary `np.void` columns with
2171+
# columns of the appropriate dtype, based on `value`.
2172+
self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
2173+
value, len(self.obj)
2174+
)
21682175
self.obj._mgr.column_setitem(loc, plane_indexer, value)
21692176

21702177
self.obj._clear_item_cache()

pandas/tests/frame/indexing/test_setitem.py

+16
Original file line numberDiff line numberDiff line change
@@ -1401,3 +1401,19 @@ def test_full_setter_loc_incompatible_dtype():
14011401
df.loc[:, "a"] = {0: 3, 1: 4}
14021402
expected = DataFrame({"a": [3, 4]})
14031403
tm.assert_frame_equal(df, expected)
1404+
1405+
1406+
def test_setitem_partial_row_multiple_columns():
1407+
# https://github.com/pandas-dev/pandas/issues/56503
1408+
df = DataFrame({"A": [1, 2, 3], "B": [4.0, 5, 6]})
1409+
# should not warn
1410+
df.loc[df.index <= 1, ["F", "G"]] = (1, "abc")
1411+
expected = DataFrame(
1412+
{
1413+
"A": [1, 2, 3],
1414+
"B": [4.0, 5, 6],
1415+
"F": [1.0, 1, float("nan")],
1416+
"G": ["abc", "abc", float("nan")],
1417+
}
1418+
)
1419+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)