pandas-dev · phofl · Nov 10, 2020 · Nov 10, 2020 · Nov 10, 2020 · Nov 11, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -612,6 +612,7 @@ Indexing
 - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
 - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
 - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
+- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`, :issue:`20511`, :issue:`27583`)
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -11,8 +11,10 @@
 from pandas.errors import AbstractMethodError, InvalidIndexError
 from pandas.util._decorators import doc
 
+from pandas.core.dtypes.cast import infer_dtype_from_scalar
 from pandas.core.dtypes.common import (
     is_array_like,
+    is_dtype_equal,
     is_hashable,
     is_integer,
     is_iterator,
@@ -1542,6 +1544,17 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                 val = list(value.values()) if isinstance(value, dict) else value
                 blk = self.obj._mgr.blocks[0]
                 take_split_path = not blk._can_hold_element(val)
+                if not take_split_path:
+                    if is_scalar(value):
+                        dtype, _ = infer_dtype_from_scalar(value)
+                        take_split_path = not is_dtype_equal(dtype, blk.dtype)
+                    elif isinstance(value, ABCSeries):
+                        take_split_path = not (is_dtype_equal(value.dtype, blk.dtype))
+                    elif isinstance(value, ABCDataFrame):
+                        dtypes = list(value.dtypes.unique())
+                        take_split_path = not (
+                            len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype)
+                        )
 
         # if we have any multi-indexes that have non-trivial slices
         # (not null slices) then we must take the split path, xref

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1471,7 +1471,7 @@ def test_at_time_between_time_datetimeindex(self):
         result.loc[akey] = 0
         result = result.loc[akey]
         expected = df.loc[akey].copy()
-        expected.loc[:] = 0
+        expected.loc[:] = 0.0
         tm.assert_frame_equal(result, expected)
 
         result = df.copy()
@@ -1483,7 +1483,7 @@ def test_at_time_between_time_datetimeindex(self):
         result.loc[bkey] = 0
         result = result.loc[bkey]
         expected = df.loc[bkey].copy()
-        expected.loc[:] = 0
+        expected.loc[:] = 0.0
         tm.assert_frame_equal(result, expected)
 
         result = df.copy()

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -289,6 +289,27 @@ def test_setitem_periodindex(self):
         assert isinstance(rs.index, PeriodIndex)
         tm.assert_index_equal(rs.index, rng)
 
+    @pytest.mark.parametrize("klass", [list, np.array])
+    def test_iloc_setitem_bool_indexer(self, klass):
+        # GH: 36741
+        df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
+        indexer = klass([True, False, False])
+        df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
+        expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
+        tm.assert_frame_equal(df, expected)
+
+    def test_setitem_scalar_dtype_change(self):
+        # GH#27583
+        df = DataFrame({"a": [0.0], "b": [0.0]})
+        df[["a", "b"]] = 0
+        expected = DataFrame({"a": [0], "b": [0]})
+        tm.assert_frame_equal(df, expected)
+
+        df = DataFrame({"a": [0.0], "b": [0.0]})
+        df["b"] = 0
+        expected = DataFrame({"a": [0.0], "b": [0]})
+        tm.assert_frame_equal(df, expected)
+
 
 class TestDataFrameSetItemSlicing:
     def test_setitem_slice_position(self):

diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py
@@ -120,7 +120,7 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
         exp["A"].loc[2000, 4].values[:] = 1
         tm.assert_frame_equal(df, exp)
 
-        df.loc[2000] = 5
+        df.loc[2000] = 5.0
         exp.loc[2000].values[:] = 5
         tm.assert_frame_equal(df, exp)
 

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -17,6 +17,7 @@
     concat,
     date_range,
     isna,
+    to_datetime,
 )
 import pandas._testing as tm
 from pandas.api.types import is_scalar
@@ -834,6 +835,32 @@ def test_iloc_setitem_dictionary_value(self):
         expected = DataFrame({"x": [1, 9], "y": [2, 99]})
         tm.assert_frame_equal(df, expected)
 
+    def test_iloc_setitem_conversion_to_datetime(self):
+        # GH#20511
+        df = DataFrame(
+            [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]],
+            columns=["date0", "date1"],
+        )
+        df.iloc[:, [0]] = df.iloc[:, [0]].apply(
+            lambda x: to_datetime(x, errors="coerce")
+        )
+        expected = DataFrame(
+            {
+                "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")],
+                "date1": ["2016-01-01", "2015-01-01"],
+            }
+        )
+        tm.assert_frame_equal(df, expected)
+
+    def test_iloc_conversion_to_float_32_for_columns_list(self):
+        # GH#33198
+        arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64)
+        df = DataFrame(arr)
+        df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32)
+        result = df.dtypes.value_counts()
+        expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")])
+        tm.assert_series_equal(result, expected)
+
 
 class TestILocErrors:
     # NB: this test should work for _any_ Series we can pass as

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1127,6 +1127,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
 
         tm.assert_frame_equal(expected, df)
 
+    def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self):
+        # GH#20635
+        df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]})
+        df.loc[:, "C"] = df["C"].astype("int64")
+        expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]})
+        tm.assert_frame_equal(df, expected)
+
+    @pytest.mark.parametrize("dtype", ["int64", "Int64"])
+    def test_loc_setitem_null_slice_different_dtypes(self, dtype):
+        # GH#20635
+        df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]})
+        rhs = df[["B", "C"]].astype("int64").astype(dtype)
+        df.loc[:, ["B", "C"]] = rhs
+        expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]})
+        expected[["B", "C"]] = expected[["B", "C"]].astype(dtype)
+        tm.assert_frame_equal(df, expected)
+
 
 class TestLocWithMultiIndex:
     @pytest.mark.parametrize(
@@ -2000,6 +2017,14 @@ def test_loc_setitem_dt64tz_values(self):
         result = s2["a"]
         assert result == expected
 
+    @pytest.mark.parametrize("dtype", ["int64", "Int64"])
+    def test_setitem_series_null_slice_different_dtypes(self, dtype):
+        # GH: 20635
+        ser = Series(["3", "4"], name="A")
+        ser.loc[:] = ser.astype("int64").astype(dtype)
+        expected = Series([3, 4], name="A", dtype=dtype)
+        tm.assert_series_equal(ser, expected)
+
 
 @pytest.mark.parametrize("value", [1, 1.5])
 def test_loc_int_in_object_index(frame_or_series, value):