Skip to content

Commit aa6ab37

Browse files
authored
BUG: wrong future Warning on string assignment in certain condition (#57402)
* whatsnew * factor out construct_1d_array_from_inferred_fill_value * 🏷️ typing
1 parent 7e255a6 commit aa6ab37

File tree

4 files changed

+47
-9
lines changed

4 files changed

+47
-9
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :meth:`CategoricalIndex.difference` raising ``KeyError`` when other contains null values other than NaN (:issue:`57318`)
2525
- Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`)
2626
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
27+
- Fixed regression in :meth:`DataFrame.loc` which was unnecessarily throwing "incompatible dtype warning" when expanding with partial row indexer and multiple columns (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) (:issue:`56503`)
2728
- Fixed regression in :meth:`DataFrame.map` with ``na_action="ignore"`` not being respected for NumPy nullable and :class:`ArrowDtypes` (:issue:`57316`)
2829
- Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`)
2930
- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`)

pandas/core/dtypes/missing.py

+14
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,20 @@ def infer_fill_value(val):
646646
return np.nan
647647

648648

649+
def construct_1d_array_from_inferred_fill_value(
650+
value: object, length: int
651+
) -> ArrayLike:
652+
# Find our empty_value dtype by constructing an array
653+
# from our value and doing a .take on it
654+
from pandas.core.algorithms import take_nd
655+
from pandas.core.construction import sanitize_array
656+
from pandas.core.indexes.base import Index
657+
658+
arr = sanitize_array(value, Index(range(1)), copy=False)
659+
taker = -1 * np.ones(length, dtype=np.intp)
660+
return take_nd(arr, taker)
661+
662+
649663
def maybe_fill(arr: np.ndarray) -> np.ndarray:
650664
"""
651665
Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype.

pandas/core/indexing.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
ABCSeries,
5151
)
5252
from pandas.core.dtypes.missing import (
53+
construct_1d_array_from_inferred_fill_value,
5354
infer_fill_value,
5455
is_valid_na_for_dtype,
5556
isna,
@@ -61,7 +62,6 @@
6162
from pandas.core.construction import (
6263
array as pd_array,
6364
extract_array,
64-
sanitize_array,
6565
)
6666
from pandas.core.indexers import (
6767
check_array_indexer,
@@ -854,7 +854,6 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
854854
if self.ndim != 2:
855855
return
856856

857-
orig_key = key
858857
if isinstance(key, tuple) and len(key) > 1:
859858
# key may be a tuple if we are .loc
860859
# if length of key is > 1 set key to column part
@@ -872,7 +871,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
872871
keys = self.obj.columns.union(key, sort=False)
873872
diff = Index(key).difference(self.obj.columns, sort=False)
874873

875-
if len(diff) and com.is_null_slice(orig_key[0]):
874+
if len(diff):
876875
# e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
877876
# is a new column, add the new columns with dtype=np.void
878877
# so that later when we go through setitem_single_column
@@ -1878,12 +1877,9 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
18781877

18791878
self.obj[key] = empty_value
18801879
elif not is_list_like(value):
1881-
# Find our empty_value dtype by constructing an array
1882-
# from our value and doing a .take on it
1883-
arr = sanitize_array(value, Index(range(1)), copy=False)
1884-
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1885-
empty_value = algos.take_nd(arr, taker)
1886-
self.obj[key] = empty_value
1880+
self.obj[key] = construct_1d_array_from_inferred_fill_value(
1881+
value, len(self.obj)
1882+
)
18871883
else:
18881884
# FIXME: GH#42099#issuecomment-864326014
18891885
self.obj[key] = infer_fill_value(value)
@@ -2165,6 +2161,17 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21652161
else:
21662162
# set value into the column (first attempting to operate inplace, then
21672163
# falling back to casting if necessary)
2164+
dtype = self.obj.dtypes.iloc[loc]
2165+
if dtype == np.void:
2166+
# This means we're expanding, with multiple columns, e.g.
2167+
# df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
2168+
# df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')
2169+
# Columns F and G will initially be set to np.void.
2170+
# Here, we replace those temporary `np.void` columns with
2171+
# columns of the appropriate dtype, based on `value`.
2172+
self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
2173+
value, len(self.obj)
2174+
)
21682175
self.obj._mgr.column_setitem(loc, plane_indexer, value)
21692176

21702177
def _setitem_single_block(self, indexer, value, name: str) -> None:

pandas/tests/frame/indexing/test_setitem.py

+16
Original file line numberDiff line numberDiff line change
@@ -1369,3 +1369,19 @@ def test_full_setter_loc_incompatible_dtype():
13691369
df.loc[:, "a"] = {0: 3, 1: 4}
13701370
expected = DataFrame({"a": [3, 4]})
13711371
tm.assert_frame_equal(df, expected)
1372+
1373+
1374+
def test_setitem_partial_row_multiple_columns():
1375+
# https://github.com/pandas-dev/pandas/issues/56503
1376+
df = DataFrame({"A": [1, 2, 3], "B": [4.0, 5, 6]})
1377+
# should not warn
1378+
df.loc[df.index <= 1, ["F", "G"]] = (1, "abc")
1379+
expected = DataFrame(
1380+
{
1381+
"A": [1, 2, 3],
1382+
"B": [4.0, 5, 6],
1383+
"F": [1.0, 1, float("nan")],
1384+
"G": ["abc", "abc", float("nan")],
1385+
}
1386+
)
1387+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)