diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 08edc7531bcd6..e875153626b5a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -622,6 +622,7 @@ Indexing - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when the index was of ``object`` dtype and the given numeric label was in the index (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from a :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) +- Bug in :meth:`DataFrame.loc.__setitem__` expanding an empty :class:`DataFrame` with mixed dtypes (:issue:`37932`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty DataFrame with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bdd79e37ba386..f66fd19f30d84 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1687,6 +1687,14 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): for loc, v in zip(ilocs, value): self._setitem_single_column(loc, v, pi) + elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: + # This is a setitem-with-expansion, see + # test_loc_setitem_empty_append_expands_rows_mixed_dtype + # e.g. df = DataFrame(columns=["x", "y"]) + # df["x"] = df["x"].astype(np.int64) + # df.loc[:, "x"] = [1, 2, 3] + self._setitem_single_column(ilocs[0], value, pi) + else: raise ValueError( "Must have equal len keys and value " diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f8ad16be607a5..cf6c2878acd9a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -952,7 +952,7 @@ def test_loc_uint64(self): result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] tm.assert_series_equal(result, s) - def test_loc_setitem_empty_append(self): + def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe data = [1, 2, 3] @@ -963,6 +963,18 @@ def test_loc_setitem_empty_append(self): df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) + def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): + # GH#37932 same as test_loc_setitem_empty_append_expands_rows + # but with mixed dtype so we go through take_split_path + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + df = DataFrame(columns=["x", "y"]) + df["x"] = df["x"].astype(np.int64) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_single_value(self): # only appends one value expected = DataFrame({"x": [1.0], "y": [np.nan]}) df = DataFrame(columns=["x", "y"], dtype=float)