diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..87b9683882748 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -339,6 +339,7 @@ Indexing - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` with empty :class:`DataFrame` and specified columns for string indexer and non empty :class:`DataFrame` to set (:issue:`38831`) - Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) +- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`, :issue:`20511`, :issue:`27583`) - Bug in :meth:`DataFrame.__setitem__` not raising ``ValueError`` when right hand side is a :class:`DataFrame` with wrong number of columns (:issue:`38604`) - Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cc7c5f666feda..6697f6251669d 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,9 +13,11 @@ from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_dtype_equal, is_hashable, is_integer, is_iterator, @@ -1559,6 +1561,26 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): val = list(value.values()) if isinstance(value, dict) else value blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) + if not take_split_path: + if ( + isinstance(indexer, tuple) + and is_integer(indexer[0]) + and is_integer(indexer[1]) + and not is_scalar(value) + ): + # GH#37749 this is for listlikes to be treated as scalars, can + # not take split path here + pass + elif is_scalar(value): + dtype, _ = infer_dtype_from_scalar(value) + take_split_path = not is_dtype_equal(dtype, blk.dtype) + elif isinstance(value, ABCSeries): + take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) + elif isinstance(value, ABCDataFrame): + dtypes = list(value.dtypes.unique()) + take_split_path = not ( + len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) + ) # if we have any multi-indexes that have non-trivial slices # (not null slices) then we must take the split path, xref diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 6808ffe65e561..4e61323065a16 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1492,7 +1492,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[akey] = 0 result = result.loc[akey] expected = df.loc[akey].copy() - expected.loc[:] = 0 + expected.loc[:] = 0.0 tm.assert_frame_equal(result, expected) result = df.copy() @@ -1504,7 +1504,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[bkey] = 0 result = result.loc[bkey] expected = df.loc[bkey].copy() - expected.loc[:] = 0 + expected.loc[:] = 0.0 tm.assert_frame_equal(result, expected) result = df.copy() diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9318764a1b5ad..d95f0990d91e1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -395,6 +395,18 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs + def test_setitem_scalar_dtype_change(self): + # GH#27583 + df = DataFrame({"a": [0.0], "b": [0.0]}) + df[["a", "b"]] = 0 + expected = DataFrame({"a": [0], "b": [0]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [0.0], "b": [0.0]}) + df["b"] = 0 + expected = DataFrame({"a": [0.0], "b": [0]}) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemWithExpansion: def test_setitem_listlike_views(self): diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c203d986efd23..c2b296ddce63b 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -127,7 +127,7 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): exp["A"].loc[2000, 4].values[:] = 1 tm.assert_frame_equal(df, exp) - df.loc[2000] = 5 + df.loc[2000] = 5.0 exp.loc[2000].values[:] = 5 tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1668123e782ff..937576a50f2fc 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -14,10 +14,12 @@ Index, NaT, Series, + Timestamp, array as pd_array, concat, date_range, isna, + to_datetime, ) import pandas._testing as tm from pandas.api.types import is_scalar @@ -987,6 +989,32 @@ def test_iloc_setitem_dictionary_value(self): expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]}) tm.assert_frame_equal(df, expected) + def test_iloc_setitem_conversion_to_datetime(self): + # GH#20511 + df = DataFrame( + [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], + columns=["date0", "date1"], + ) + df.iloc[:, [0]] = df.iloc[:, [0]].apply( + lambda x: to_datetime(x, errors="coerce") + ) + expected = DataFrame( + { + "date0": [Timestamp("2015-01-01"), Timestamp("2016-01-01")], + "date1": ["2016-01-01", "2015-01-01"], + } + ) + tm.assert_frame_equal(df, expected) + + def test_iloc_conversion_to_float_32_for_columns_list(self): + # GH#33198 + arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64) + df = DataFrame(arr) + df.iloc[:, 11:] = df.iloc[:, 11:].astype(np.float32) + result = df.dtypes.value_counts() + expected = Series([11, 9], index=[np.dtype("float64"), np.dtype("float32")]) + tm.assert_series_equal(result, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1cd352e4e0899..a68abfd0588c5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1149,6 +1149,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): tm.assert_frame_equal(expected, df) + def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self): + # GH#20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) + df.loc[:, "C"] = df["C"].astype("int64") + expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_loc_setitem_null_slice_different_dtypes(self, dtype): + # GH#20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) + rhs = df[["B", "C"]].astype("int64").astype(dtype) + df.loc[:, ["B", "C"]] = rhs + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) + expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) + tm.assert_frame_equal(df, expected) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -2117,6 +2134,14 @@ def test_loc_setitem_dt64tz_values(self): result = s2["a"] assert result == expected + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_loc_setitem_series_null_slice_different_dtypes(self, dtype): + # GH#20635 + ser = Series(["3", "4"], name="A") + ser.loc[:] = ser.astype("int64").astype(dtype) + expected = Series([3, 4], name="A", dtype=dtype) + tm.assert_series_equal(ser, expected) + @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple]) @pytest.mark.parametrize("size", [0, 4, 5, 6]) def test_loc_iloc_setitem_with_listlike(self, size, array_fn):