pandas-dev · jbrockmendel · Dec 28, 2020 · Jan 6, 2021 · Jan 8, 2021 · Jan 8, 2021
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5246,12 +5246,34 @@ def _replace_columnwise(
                 target, value = mapping[ax[i]]
                 newobj = ser.replace(target, value, regex=regex)
 
-                res.iloc[:, i] = newobj
+                res._isetitem(i, newobj)
 
         if inplace:
             return
         return res.__finalize__(self)
 
+    def _isetitem(self, loc: int, value):
+        """
+        Set a new array in our the given column position.
+
+        Notes
+        -----
+        Replaces the existing array, does not write into it.
+        """
+        cols = self.columns
+        if cols.is_unique:
+            col = cols[loc]
+            self[col] = value
+            return
+
+        # Otherwise we temporarily pin unique columns and call __setitem__
+        newcols = Index(range(len(cols)))
+        try:
+            self.columns = newcols
+            self[loc] = value
+        finally:
+            self.columns = cols
+
     @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
     def shift(
         self,

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -58,6 +58,7 @@
     Index,
     MultiIndex,
 )
+from pandas.core.internals import ArrayManager
 
 if TYPE_CHECKING:
     from pandas import (
@@ -634,12 +635,12 @@ def __call__(self, axis=None):
         new_self.axis = axis
         return new_self
 
-    def _get_setitem_indexer(self, key):
+    def _get_setitem_indexer(self, key, value):
         """
         Convert a potentially-label-based key into a positional indexer.
         """
         if self.name == "loc":
-            self._ensure_listlike_indexer(key)
+            self._ensure_listlike_indexer(key, value=value)
 
         if self.axis is not None:
             return self._convert_tuple(key)
@@ -677,9 +678,11 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
         if self.ndim != 2:
             return
 
+        pi = None
         if isinstance(key, tuple) and len(key) > 1:
             # key may be a tuple if we are .loc
             # if length of key is > 1 set key to column part
+            pi = key[0]
             key = key[column_axis]
             axis = column_axis
 
@@ -693,17 +696,26 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
             # GH#38148
             keys = self.obj.columns.union(key, sort=False)
 
-            self.obj._mgr = self.obj._mgr.reindex_axis(
-                keys, axis=0, consolidate=False, only_slice=True
-            )
+            if isinstance(value, ABCDataFrame) and com.is_null_slice(pi):
+                # We are setting obj.loc[:, new_keys] = newframe
+                # Setting these directly instead of reindexing keeps
+                #  us from converting integer dtypes to floats
+                new_keys = keys.difference(self.obj.columns)
+                self.obj[new_keys] = value[new_keys]
+
+            else:
+
+                self.obj._mgr = self.obj._mgr.reindex_axis(
+                    keys, axis=0, consolidate=False, only_slice=True
+                )
 
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
             key = tuple(list(x) if is_iterator(x) else x for x in key)
             key = tuple(com.apply_if_callable(x, self.obj) for x in key)
         else:
             key = com.apply_if_callable(key, self.obj)
-        indexer = self._get_setitem_indexer(key)
+        indexer = self._get_setitem_indexer(key, value)
         self._has_valid_setitem_indexer(key)
 
         iloc = self if self.name == "iloc" else self.obj.iloc
@@ -1238,6 +1250,8 @@ def _convert_to_indexer(self, key, axis: int):
                 key = list(key)
 
             if com.is_bool_indexer(key):
+                # TODO: in this case should we do a .take on the value here?
+                # test_loc_setitem_all_false_boolean_two_blocks
                 key = check_bool_indexer(labels, key)
                 (inds,) = key.nonzero()
                 return inds
@@ -1490,7 +1504,7 @@ def _convert_to_indexer(self, key, axis: int):
         """
         return key
 
-    def _get_setitem_indexer(self, key):
+    def _get_setitem_indexer(self, key, value):
         # GH#32257 Fall through to let numpy do validation
         if is_iterator(key):
             return list(key)
@@ -1512,32 +1526,6 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
         """
         info_axis = self.obj._info_axis_number
 
-        # maybe partial set
-        take_split_path = not self.obj._mgr.is_single_block
-
-        # if there is only one block/type, still have to take split path
-        # unless the block is one-dimensional or it can hold the value
-        if (
-            not take_split_path
-            and getattr(self.obj._mgr, "blocks", False)
-            and self.ndim > 1
-        ):
-            # in case of dict, keys are indices
-            val = list(value.values()) if isinstance(value, dict) else value
-            blk = self.obj._mgr.blocks[0]
-            take_split_path = not blk._can_hold_element(val)
-
-        # if we have any multi-indexes that have non-trivial slices
-        # (not null slices) then we must take the split path, xref
-        # GH 10360, GH 27841
-        if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
-            for i, ax in zip(indexer, self.obj.axes):
-                if isinstance(ax, MultiIndex) and not (
-                    is_integer(i) or com.is_null_slice(i)
-                ):
-                    take_split_path = True
-                    break
-
         if isinstance(indexer, tuple):
             nindexer = []
             for i, idx in enumerate(indexer):
@@ -1631,7 +1619,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                 return
 
         # align and set the values
-        if take_split_path:
+        if self.ndim > 1:
             # We have to operate column-wise
             self._setitem_with_indexer_split_path(indexer, value, name)
         else:
@@ -1644,23 +1632,63 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
         # Above we only set take_split_path to True for 2D cases
         assert self.ndim == 2
 
+        orig = indexer
         if not isinstance(indexer, tuple):
             indexer = _tuplify(self.ndim, indexer)
         if len(indexer) > self.ndim:
             raise IndexError("too many indices for array")
         if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
             raise ValueError(r"Cannot set values with ndim > 2")
-
         if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
             from pandas import Series
 
             value = self._align_series(indexer, Series(value))
 
+        info_idx = indexer[1]
+        pi = indexer[0]
+        if (
+            isinstance(pi, ABCDataFrame)
+            and orig is pi
+            and hasattr(self.obj._mgr, "blocks")
+            and len(self.obj._mgr.blocks) == 1
+        ):
+            # FIXME: kludge
+            return self._setitem_single_block(orig, value, name)
+
+        if (
+            com.is_null_slice(info_idx)
+            and is_scalar(value)
+            and not isinstance(pi, ABCDataFrame)
+            and not isinstance(self.obj._mgr, ArrayManager)
+        ):
+            # We can go directly through BlockManager.setitem without worrying
+            #  about alignment.
+            # TODO: do we need to do some kind of copy_with_setting check?
+            self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
+            return
+
+        if is_integer(info_idx) and not isinstance(self.obj._mgr, ArrayManager):
+            if is_integer(pi):
+                # We need to watch out for case where we are treating a listlike
+                #  as a scalar, e.g. test_setitem_iloc_scalar_single for JSONArray
+
+                mgr = self.obj._mgr
+                blkno = mgr.blknos[info_idx]
+                blkloc = mgr.blklocs[info_idx]
+                blk = mgr.blocks[blkno]
+
+                if blk._can_hold_element(value):
+                    # NB: we are assuming here that _can_hold_element is accurate
+                    # TODO: do we need to do some kind of copy_with_setting check?
+                    self.obj._check_is_chained_assignment_possible()
+                    blk.setitem_inplace((pi, blkloc), value)
+                    self.obj._maybe_update_cacher(clear=True)
+                    return
+
         # Ensure we have something we can iterate over
         info_axis = indexer[1]
         ilocs = self._ensure_iterable_column_indexer(info_axis)
 
-        pi = indexer[0]
         lplane_indexer = length_of_indexer(pi, self.obj.index)
         # lplane_indexer gives the expected length of obj[indexer[0]]
 
@@ -1676,7 +1704,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
                 # We are setting multiple rows in a single column.
-                self._setitem_single_column(ilocs[0], value, pi)
+                self._setitem_iat_loc(ilocs[0], pi, value)
 
             elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
                 # We are trying to set N values into M entries of a single
@@ -1700,7 +1728,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
             elif len(ilocs) == len(value):
                 # We are setting multiple columns in a single row.
                 for loc, v in zip(ilocs, value):
-                    self._setitem_single_column(loc, v, pi)
+                    self._setitem_iat_loc(loc, pi, v)
 
             elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
                 # This is a setitem-with-expansion, see
@@ -1738,6 +1766,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
 
         for i, loc in enumerate(ilocs):
             # setting with a list, re-coerces
+            # self._setitem_iat_loc(loc, pi, value[:, i].tolist())
             self._setitem_single_column(loc, value[:, i].tolist(), pi)
 
     def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
@@ -1754,7 +1783,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
         if name == "iloc":
             for i, loc in enumerate(ilocs):
                 val = value.iloc[:, i]
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
 
         elif not unique_cols and value.columns.equals(self.obj.columns):
             # We assume we are already aligned, see
@@ -1771,12 +1800,15 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
                 else:
                     val = np.nan
 
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
 
         elif not unique_cols:
             raise ValueError("Setting with non-unique columns is not allowed.")
 
         else:
+            # TODO: not totally clear why we are requiring this
+            self._align_frame(indexer[0], value)
+
             for loc in ilocs:
                 item = self.obj.columns[loc]
                 if item in value:
@@ -1787,7 +1819,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
                 else:
                     val = np.nan
 
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
 
     def _setitem_single_column(self, loc: int, value, plane_indexer):
         """
@@ -1829,6 +1861,33 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # reset the sliced object if unique
         self.obj._iset_item(loc, ser)
 
+    def _setitem_iat_loc(self, loc: int, pi, value):
+        # TODO: likely a BM method?
+        if isinstance(self.obj._mgr, ArrayManager):
+            # TODO: implement this correctly for ArrayManager
+            return self._setitem_single_column(loc, value, pi)
+
+        mgr = self.obj._mgr
+        blkno = mgr.blknos[loc]
+        blkloc = mgr.blklocs[loc]
+        blk = mgr.blocks[blkno]
+        assert blk.mgr_locs[blkloc] == loc
+
+        if blk._can_hold_element(value):
+            # NB: we are assuming here that _can_hold_element is accurate
+            # TODO: do we need to do some kind of copy_with_setting check?
+            try:
+                self.obj._check_is_chained_assignment_possible()
+                blk.setitem_inplace((pi, blkloc), value)
+                self.obj._maybe_update_cacher(clear=True)
+            except ValueError:
+                if blk.is_extension:
+                    # FIXME: kludge bc _can_hold_element is wrong for EABLock
+                    return self._setitem_single_column(loc, value, pi)
+                raise
+        else:
+            self._setitem_single_column(loc, value, pi)
+
     def _setitem_single_block(self, indexer, value, name: str):
         """
         _setitem_with_indexer for the case when we have a single Block.

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -35,6 +35,7 @@
 from pandas.core.dtypes.cast import (
     astype_array_safe,
     can_hold_element,
+    convert_scalar_for_putitemlike,
     find_common_type,
     infer_dtype_from,
     maybe_downcast_numeric,
@@ -944,22 +945,15 @@ def setitem(self, indexer, value):
             values[indexer] = value
 
         elif exact_match and is_categorical_dtype(arr_value.dtype):
-            # GH25495 - If the current dtype is not categorical,
-            # we need to create a new categorical block
             values[indexer] = value
 
         elif exact_match and is_ea_value:
-            # GH#32395 if we're going to replace the values entirely, just
-            #  substitute in the new array
             if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)):
                 values[indexer] = value.to_numpy(value.dtype.numpy_dtype)
             else:
                 values[indexer] = np.asarray(value)
 
-        # if we are an exact match (ex-broadcasting),
-        # then use the resultant dtype
         elif exact_match:
-            # We are setting _all_ of the array's values, so can cast to new dtype
             values[indexer] = value
 
         elif is_ea_value:
@@ -978,6 +972,31 @@ def setitem(self, indexer, value):
         block = self.make_block(values)
         return block
 
+    @final
+    def setitem_inplace(self, indexer, value) -> None:
+        """
+        setitem but only inplace.
+
+        Notes
+        -----
+        Assumes self is 2D and that indexer is a 2-tuple.
+        """
+        if lib.is_scalar(value) and isinstance(self.dtype, np.dtype):
+            # Convert timedelta/datetime to timedelta64/datetime64
+            value = convert_scalar_for_putitemlike(value, self.dtype)
+
+        pi = indexer[0]
+        values = self.values
+        if not isinstance(self, ExtensionBlock):
+            # includes DatetimeArray, TimedeltaArray
+            blkloc = indexer[1]
+            # error: Invalid index type "Tuple[Any, Any]" for "ExtensionArray";
+            # expected type "Union[int, slice, ndarray]"
+            values[blkloc, pi] = value  # type: ignore[index]
+        else:
+            # TODO(EA2D): special case not needed with 2D EAs
+            values[pi] = value
+
     def putmask(self, mask, new) -> list[Block]:
         """
         putmask the data to the block; it is possible that we may create a
@@ -1832,6 +1851,7 @@ def convert(
             copy=copy,
         )
         res_values = ensure_block_shape(res_values, self.ndim)
+        res_values = ensure_wrapped_if_datetimelike(res_values)
         return [self.make_block(res_values)]