From 2d2858fccb0ae0f5c6617d1d476de3e63d2f9705 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 13 Jan 2021 17:51:57 -0800
Subject: [PATCH 1/3] API/BUG: always try to operate inplace when setting with
 loc/iloc[foo, bar]

---
 pandas/core/indexers.py                      | 23 ++++++-
 pandas/core/indexing.py                      |  3 +
 pandas/core/internals/blocks.py              | 22 +++---
 pandas/tests/extension/base/setitem.py       |  9 ++-
 pandas/tests/extension/test_numpy.py         | 17 +++++
 pandas/tests/frame/indexing/test_indexing.py |  4 +-
 pandas/tests/indexing/test_indexing.py       |  1 +
 pandas/tests/indexing/test_loc.py            | 71 ++++++++++++--------
 8 files changed, 109 insertions(+), 41 deletions(-)

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
index a221f77f26170..79479c6db8d9d 100644
--- a/pandas/core/indexers.py
+++ b/pandas/core/indexers.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from pandas._typing import Any, AnyArrayLike
+from pandas._typing import Any, AnyArrayLike, ArrayLike
 
 from pandas.core.dtypes.common import (
     is_array_like,
@@ -270,6 +270,27 @@ def maybe_convert_indices(indices, n: int):
 # Unsorted
 
 
+def is_exact_shape_match(target: ArrayLike, value: ArrayLike) -> bool:
+    """
+    Is setting this value into this target overwriting the entire column?
+
+    Parameters
+    ----------
+    target : np.ndarray or ExtensionArray
+    value : np.ndarray or ExtensionArray
+
+    Returns
+    -------
+    bool
+    """
+    return (
+        len(value.shape) > 0
+        and len(target.shape) > 0
+        and value.shape[0] == target.shape[0]
+        and value.size == target.size
+    )
+
+
 def length_of_indexer(indexer, target=None) -> int:
     """
     Return the expected length of target[indexer]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ac0955c6c3e4c..f33e5e990fab2 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -33,6 +33,7 @@
 from pandas.core.construction import array as pd_array
 from pandas.core.indexers import (
     check_array_indexer,
+    is_exact_shape_match,
     is_list_like_indexer,
     length_of_indexer,
 )
@@ -1815,6 +1816,8 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # GH#6149 (null slice), GH#10408 (full bounds)
         if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
             ser = value
+        elif is_array_like(value) and is_exact_shape_match(value, ser):
+            ser = value
         else:
             # set the item, possibly having a dtype change
             ser = ser.copy()
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 9eb4bdc5dbae3..8f4753c9d0cfc 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -65,6 +65,8 @@
     Categorical,
     DatetimeArray,
     ExtensionArray,
+    FloatingArray,
+    IntegerArray,
     PandasArray,
     PandasDtype,
     TimedeltaArray,
@@ -75,6 +77,7 @@
 from pandas.core.indexers import (
     check_setitem_lengths,
     is_empty_indexer,
+    is_exact_shape_match,
     is_scalar_indexer,
 )
 import pandas.core.missing as missing
@@ -947,11 +950,8 @@ def setitem(self, indexer, value):
 
         # length checking
         check_setitem_lengths(indexer, value, values)
-        exact_match = (
-            len(arr_value.shape)
-            and arr_value.shape[0] == values.shape[0]
-            and arr_value.size == values.size
-        )
+        exact_match = is_exact_shape_match(values, arr_value)
+
         if is_empty_indexer(indexer, arr_value):
             # GH#8669 empty indexers
             pass
@@ -965,12 +965,14 @@ def setitem(self, indexer, value):
             # GH25495 - If the current dtype is not categorical,
             # we need to create a new categorical block
             values[indexer] = value
-            return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
 
         elif exact_match and is_ea_value:
             # GH#32395 if we're going to replace the values entirely, just
             #  substitute in the new array
-            return self.make_block(arr_value)
+            if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)):
+                values[indexer] = value.to_numpy(value.dtype.numpy_dtype)
+            else:
+                values[indexer] = np.asarray(value)
 
         # if we are an exact match (ex-broadcasting),
         # then use the resultant dtype
@@ -978,8 +980,6 @@ def setitem(self, indexer, value):
             # We are setting _all_ of the array's values, so can cast to new dtype
             values[indexer] = value
 
-            values = values.astype(arr_value.dtype, copy=False)
-
         elif is_ea_value:
             # GH#38952
             if values.ndim == 1:
@@ -1907,6 +1907,10 @@ class NumericBlock(Block):
     _can_hold_na = True
 
     def _can_hold_element(self, element: Any) -> bool:
+        element = extract_array(element, extract_numpy=True)
+        if isinstance(element, (IntegerArray, FloatingArray)):
+            if element._mask.any():
+                return False
         return can_hold_element(self.dtype, element)
 
 
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index cb9a19b438feb..e610c1659a268 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -341,13 +341,20 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
 
         key = full_indexer(df)
         result.loc[key, "data"] = df["data"]
+
         self.assert_frame_equal(result, expected)
 
     def test_setitem_series(self, data, full_indexer):
         # https://github.com/pandas-dev/pandas/issues/32395
-        ser = expected = pd.Series(data, name="data")
+        ser = pd.Series(data, name="data")
         result = pd.Series(index=ser.index, dtype=object, name="data")
 
+        # because result has object dtype, the attempt to do setting inplace
+        #  is successful, and object dtype is retained
         key = full_indexer(ser)
         result.loc[key] = ser
+
+        expected = pd.Series(
+            data.astype(object), index=ser.index, name="data", dtype=object
+        )
         self.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 1f0181eec8830..08608ce0a3d15 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -501,6 +501,23 @@ def test_setitem_slice(self, data, box_in_series):
     def test_setitem_loc_iloc_slice(self, data):
         super().test_setitem_loc_iloc_slice(data)
 
+    def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        df = expected = pd.DataFrame({"data": pd.Series(data)})
+        result = pd.DataFrame(index=df.index)
+
+        # because result has object dtype, the attempt to do setting inplace
+        #  is successful, and object dtype is retained
+        key = full_indexer(df)
+        result.loc[key, "data"] = df["data"]
+
+        # base class method has expected = df; PandasArray behaves oddly because
+        #  we patch _typ for these tests.
+        if data.dtype.numpy_dtype != object:
+            if not isinstance(key, slice) or key != slice(None):
+                expected = pd.DataFrame({"data": data.to_numpy()})
+        self.assert_frame_equal(result, expected)
+
 
 @skip_nested
 class TestParsing(BaseNumPyTests, base.BaseParsingTests):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 6a16b93c8da2f..69a411047c7d2 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1465,7 +1465,9 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
         tz = tz_naive_fixture
         idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz)
         expected = DataFrame(1.2, index=idx, columns=["var"])
-        result = DataFrame(index=idx, columns=["var"])
+        # if result started off with object dtype, tehn the .loc.__setitem__
+        #  below would retain object dtype
+        result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
         result.loc[:, idxer] = expected
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index f67341ab176d7..509537f967bb1 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -637,6 +637,7 @@ def test_float_index_non_scalar_assignment(self):
         expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index)
         tm.assert_frame_equal(expected, df)
 
+    def test_loc_setitem_fullindex_views(self):
         df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
         df2 = df.copy()
         df.loc[df.index] = df.loc[df.index]
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 7c73917e44b22..30d2b067b5918 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -556,34 +556,19 @@ def test_loc_modify_datetime(self):
 
         tm.assert_frame_equal(df, expected)
 
-    def test_loc_setitem_frame(self):
-        df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))
-
-        result = df.iloc[0, 0]
-
-        df.loc["a", "A"] = 1
-        result = df.loc["a", "A"]
-        assert result == 1
-
-        result = df.iloc[0, 0]
-        assert result == 1
-
-        df.loc[:, "B":"D"] = 0
-        expected = df.loc[:, "B":"D"]
-        result = df.iloc[:, 1:]
-        tm.assert_frame_equal(result, expected)
-
-        # GH 6254
-        # setting issue
-        df = DataFrame(index=[3, 5, 4], columns=["A"])
+    def test_loc_setitem_frame_with_reindex(self):
+        # GH#6254 setting issue
+        df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
         df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
-        expected = DataFrame({"A": Series([1, 2, 3], index=[4, 3, 5])}).reindex(
-            index=[3, 5, 4]
-        )
+
+        # setting integer values into a float dataframe with loc is inplace,
+        #  so we retain float dtype
+        ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
+        expected = DataFrame({"A": ser})
         tm.assert_frame_equal(df, expected)
 
-        # GH 6252
-        # setting with an empty frame
+    def test_loc_setitem_empty_frame(self):
+        # GH#6252 setting with an empty frame
         keys1 = ["@" + str(i) for i in range(5)]
         val1 = np.arange(5, dtype="int64")
 
@@ -598,11 +583,31 @@ def test_loc_setitem_frame(self):
         df["B"] = np.nan
         df.loc[keys2, "B"] = val2
 
-        expected = DataFrame(
-            {"A": Series(val1, index=keys1), "B": Series(val2, index=keys2)}
-        ).reindex(index=index)
+        # Becasue df["A"] was initialized as float64, setting values into it
+        #  is inplace, so that dtype is retained
+        sera = Series(val1, index=keys1, dtype=np.float64)
+        serb = Series(val2, index=keys2)
+        expected = DataFrame({"A": sera, "B": serb}).reindex(index=index)
         tm.assert_frame_equal(df, expected)
 
+    def test_loc_setitem_frame(self):
+        df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))
+
+        result = df.iloc[0, 0]
+
+        df.loc["a", "A"] = 1
+        result = df.loc["a", "A"]
+        assert result == 1
+
+        result = df.iloc[0, 0]
+        assert result == 1
+
+        df.loc[:, "B":"D"] = 0
+        expected = df.loc[:, "B":"D"]
+        result = df.iloc[:, 1:]
+        tm.assert_frame_equal(result, expected)
+
+    def test_loc_setitem_frame_nan_int_coercion_invalid(self):
         # GH 8669
         # invalid coercion of nan -> int
         df = DataFrame({"A": [1, 2, 3], "B": np.nan})
@@ -610,6 +615,7 @@ def test_loc_setitem_frame(self):
         expected = DataFrame({"A": [1, 2, 3], "B": np.nan})
         tm.assert_frame_equal(df, expected)
 
+    def test_loc_setitem_frame_mixed_labels(self):
         # GH 6546
         # setting with mixed labels
         df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
@@ -1035,8 +1041,15 @@ def test_loc_setitem_str_to_small_float_conversion_type(self):
         expected = DataFrame(col_data, columns=["A"], dtype=object)
         tm.assert_frame_equal(result, expected)
 
-        # change the dtype of the elements from object to float one by one
+        # assigning with loc/iloc attempts to set the values inplace, which
+        #  in this case is succesful
         result.loc[result.index, "A"] = [float(x) for x in col_data]
+        expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+        # assigning the entire column using __setitem__ swaps in the new array
+        # GH#???
+        result["A"] = [float(x) for x in col_data]
         expected = DataFrame(col_data, columns=["A"], dtype=float)
         tm.assert_frame_equal(result, expected)
 

From 515726de9bab5fd196243ffc42f8811fa7995917 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 20 Jan 2021 16:10:21 -0800
Subject: [PATCH 2/3] more whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index b061c4040096e..2f84c594e7b64 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -95,6 +95,45 @@ Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
    combined.dtypes
 
 
+Try operating inplace when setting values with ``loc`` and ``iloc``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When setting an entire column using ``loc`` or ``iloc``, pandas will try to
+insert the values into the existing data rather than create an entirely new array.
+
+.. ipython:: python
+
+   df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
+   values = df.values
+   new = np.array([5, 6, 7], dtype="int64")
+   df.loc[[0, 1, 2], "A"] = new
+
+In both the new and old behavior, the data in ``values`` is overwritten, but in
+the old behavior neither ``values`` nor ``new`` is a view on ``df["A"]``.
+
+*pandas 1.2.x*
+
+.. code-block:: ipython
+
+   In [1]: df.iloc[0, 0] = 9
+   In [2]: values  # does not have values[0, 0] == 9
+   Out[2]:
+   array([[5.],
+          [6.],
+          [7.]])
+   In [3]: new   # does not have new[0] == 9
+   Out [3]: array([5, 6, 7])
+
+In pandas 1.3.0, ``df`` continues to share data with ``values``
+
+*pandas 1.3.0*
+
+.. ipython:: python
+
+   df.iloc[0, 0] = 9
+   values  # _does_ have values[0, 0] == 9
+
+
 .. _whatsnew_130.api_breaking.deps:
 
 Increased minimum versions for dependencies

From 09c8ffda44bfd41f3b76cdb54128d7d8a143f513 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 9 Feb 2021 21:33:52 -0800
Subject: [PATCH 3/3] re-write whatnsew note

---
 doc/source/whatsnew/v1.3.0.rst | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index fb43d7154485e..dcdc0eae6b93e 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -115,20 +115,20 @@ insert the values into the existing data rather than create an entirely new arra
    df.loc[[0, 1, 2], "A"] = new
 
 In both the new and old behavior, the data in ``values`` is overwritten, but in
-the old behavior neither ``values`` nor ``new`` is a view on ``df["A"]``.
+the old behavior the dtype of ``df["A"]`` changed to ``int64``.
 
 *pandas 1.2.x*
 
 .. code-block:: ipython
 
-   In [1]: df.iloc[0, 0] = 9
-   In [2]: values  # does not have values[0, 0] == 9
-   Out[2]:
-   array([[5.],
-          [6.],
-          [7.]])
-   In [3]: new   # does not have new[0] == 9
-   Out [3]: array([5, 6, 7])
+   In [1]: df.dtypes
+   Out[1]:
+   A    int64
+   dtype: object
+   In [2]: np.shares_memory(df["A"].values, new)
+   Out[2]: False
+   In [3]: np.shares_memory(df["A"].values, values)
+   Out[3]: False
 
 In pandas 1.3.0, ``df`` continues to share data with ``values``
 
@@ -136,8 +136,9 @@ In pandas 1.3.0, ``df`` continues to share data with ``values``
 
 .. ipython:: python
 
-   df.iloc[0, 0] = 9
-   values  # _does_ have values[0, 0] == 9
+   df.dtypes
+   np.shares_memory(df["A"], new)
+   np.shares_memory(df["A"], values)
 
 
 .. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting: