pandas-dev · jbrockmendel · Jan 14, 2021 · Jan 14, 2021 · Jan 14, 2021 · Jan 15, 2021
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -60,6 +60,7 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
+from pandas.core.dtypes.generic import ABCDataFrame
 
 from pandas.core import nanops, ops
 from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
@@ -613,6 +614,11 @@ def _validate_listlike(self, value, allow_object: bool = False):
             # We treat empty list as our own dtype.
             return type(self)._from_sequence([], dtype=self.dtype)
 
+        if isinstance(value, ABCDataFrame) and value.shape[1] == 1:
+            # FIXME: kludge
+            res = self._validate_listlike(value._ixs(0, axis=1), allow_object=allow_object)
+            return res.reshape(-1, 1)
+
         if hasattr(value, "dtype") and value.dtype == object:
             # `array` below won't do inference if value is an Index or Series.
             #  so do so here.  in the Index case, inferred_type may be cached.

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -506,14 +506,27 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val):
+def infer_fill_value(val, length: int):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped
     element to provide proper block construction
     """
     if not is_list_like(val):
         val = [val]
+
+    if is_extension_array_dtype(val):
+        # We cannot use dtype._na_value bc pd.NA/pd.NaT do not preserve dtype
+        if len(val) == length:
+            # TODO: in this case see if we can avoid making a copy later on
+            return val
+        if length == 0:
+            return val[:0].copy()
+
+        dtype = val.dtype
+        cls = dtype.construct_array_type()
+        return cls._from_sequence([dtype._na_value], dtype=dtype).repeat(length)
+
     val = np.array(val, copy=False)
     if needs_i8_conversion(val.dtype):
         return np.array("NaT", dtype=val.dtype)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3209,6 +3209,7 @@ def _setitem_slice(self, key: slice, value):
     def _setitem_array(self, key, value):
         # also raises Exception if object array with NA values
         if com.is_bool_indexer(key):
+            # bool indexer is indexing along rows
             if len(key) != len(self.index):
                 raise ValueError(
                     f"Item wrong length {len(key)} instead of {len(self.index)}!"
@@ -3218,18 +3219,33 @@ def _setitem_array(self, key, value):
             self._check_setitem_copy()
             self.iloc[indexer] = value
         else:
-            if isinstance(value, DataFrame):
+            if isinstance(value, DataFrame):  # 7 test_string_array tests fail if this block is disabled
                 if len(value.columns) != len(key):
                     raise ValueError("Columns must be same length as key")
                 for k1, k2 in zip(key, value.columns):
                     self[k1] = value[k2]
+
+            elif not is_list_like(value):
+                for col in key:
+                    self[col] = value
+
+            elif isinstance(value, np.ndarray) and value.ndim == 2:
+                if value.shape[-1] != len(key):
+                    raise ValueError("Columns must be same length as key")
+
+                for i, col in enumerate(key):
+                    self[col] = value[:, i]
+
+            elif np.ndim(value) > 1:
+                # list of lists
+                value = DataFrame(value).values
+                return self._setitem_array(key, value)
+
             else:
-                self.loc._ensure_listlike_indexer(key, axis=1, value=value)
-                indexer = self.loc._get_listlike_indexer(
-                    key, axis=1, raise_missing=False
-                )[1]
-                self._check_setitem_copy()
-                self.iloc[:, indexer] = value
+                if len(value) != len(key):
+                    raise ValueError("Columns must be same length as key")
+                for i, col in enumerate(key):
+                    self[col] = value[i]
 
     def _setitem_frame(self, key, value):
         # support boolean setting with DataFrame input, e.g.

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -221,6 +221,21 @@ def validate_indices(indices: np.ndarray, n: int) -> None:
 # Indexer Conversion
 
 
+def ensure_iterable_indexer(ncols: int, column_indexer):
+    """
+    Ensure that our column indexer is something that can be iterated over.
+    """
+    if is_integer(column_indexer):
+        ilocs = [column_indexer]
+    elif isinstance(column_indexer, slice):
+        ilocs = np.arange(ncols)[column_indexer]
+    elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(column_indexer.dtype):
+        ilocs = np.arange(len(column_indexer))[column_indexer]
+    else:
+        ilocs = column_indexer
+    return ilocs
+
+
 def maybe_convert_indices(indices, n: int):
     """
     Attempt to convert indices into valid, positive indices.

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -16,6 +16,7 @@
 from pandas.core.dtypes.common import (
     is_array_like,
     is_bool_dtype,
+    is_extension_array_dtype,
     is_hashable,
     is_integer,
     is_iterator,
@@ -1595,7 +1596,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                             # We are setting an entire column
                             self.obj[key] = value
                         else:
-                            self.obj[key] = infer_fill_value(value)
+                            self.obj[key] = infer_fill_value(value, len(self.obj))
 
                         new_indexer = convert_from_missing_indexer_tuple(
                             indexer, self.obj.axes
@@ -1674,7 +1675,14 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
                 # We are setting multiple rows in a single column.
-                self._setitem_single_column(ilocs[0], value, pi)
+                if len(value) == len(self.obj):
+                    # Setting entire column, so swapping out
+                    # GH#??? we may want to change this behavior
+                    self.obj._iset_item(ilocs[0], value)
+                else:
+                    obj = type(self.obj)(value)
+                    self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), obj)
+                    self.obj._clear_item_cache()
 
             elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
                 # We are trying to set N values into M entries of a single
@@ -1696,17 +1704,37 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
                 pass
 
             elif len(ilocs) == len(value):
-                # We are setting multiple columns in a single row.
-                for loc, v in zip(ilocs, value):
-                    self._setitem_single_column(loc, v, pi)
+                # We are setting multiple columns in a with one row which we broadcast
+                if is_extension_array_dtype(value):  # TODO: not hit
+                    val = DataFrame.from_arrays(
+                        [value], index=[0], columns=range(len(value))
+                    )
+                elif isinstance(value, np.ndarray):
+                    val = np.atleast_2d(value)
+                else:
+                    # avoid numpy casting which can take e.g. ["b", 2] -> ["b", "2"]
+                    val = type(self.obj)([value])
+                    if lplane_indexer != 1:
+                        # broadcast to length of pi
+                        # TODO: EA compat for broadcast_to
+                        arrs = list(val._iter_column_arrays())
+                        arrs = [np.broadcast_to(x, lplane_indexer) for x in arrs]
+                        val = type(self.obj)._from_arrays(
+                            arrs, index=range(lplane_indexer), columns=range(len(arrs))
+                        )
+                self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), val)
+                self.obj._clear_item_cache()
 
             elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
                 # This is a setitem-with-expansion, see
                 #  test_loc_setitem_empty_append_expands_rows_mixed_dtype
                 # e.g. df = DataFrame(columns=["x", "y"])
                 #  df["x"] = df["x"].astype(np.int64)
                 #  df.loc[:, "x"] = [1, 2, 3]
-                self._setitem_single_column(ilocs[0], value, pi)
+
+                # Setting entire column, so swapping out
+                # GH#??? we may want to change this behavior
+                self.obj._iset_item(ilocs[0], value)
 
             else:
                 raise ValueError(
@@ -1717,8 +1745,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
         else:
 
             # scalar value
-            for loc in ilocs:
-                self._setitem_single_column(loc, value, pi)
+            self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), value)
+            self.obj._clear_item_cache()
 
     def _setitem_with_indexer_2d_value(self, indexer, value):
         # We get here with np.ndim(value) == 2, excluding DataFrame,
@@ -1734,14 +1762,14 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
                 "Must have equal len keys and value when setting with an ndarray"
             )
 
-        for i, loc in enumerate(ilocs):
-            # setting with a list, re-coerces
-            self._setitem_single_column(loc, value[:, i].tolist(), pi)
+        # wrap in DataFrame to coerce where appropriate
+        obj = type(self.obj)(value.tolist())
+        self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), obj)
+        self.obj._clear_item_cache()
 
     def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
         ilocs = self._ensure_iterable_column_indexer(indexer[1])
 
-        sub_indexer = list(indexer)
         pi = indexer[0]
 
         multiindex_indexer = isinstance(self.obj.columns, ABCMultiIndex)
@@ -1750,26 +1778,14 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
 
         # We do not want to align the value in case of iloc GH#37728
         if name == "iloc":
-            for i, loc in enumerate(ilocs):
-                val = value.iloc[:, i]
-                self._setitem_single_column(loc, val, pi)
+            self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), value)
+            self.obj._clear_item_cache()
 
         elif not unique_cols and value.columns.equals(self.obj.columns):
             # We assume we are already aligned, see
             # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
-            for loc in ilocs:
-                item = self.obj.columns[loc]
-                if item in value:
-                    sub_indexer[1] = item
-                    val = self._align_series(
-                        tuple(sub_indexer),
-                        value.iloc[:, loc],
-                        multiindex_indexer,
-                    )
-                else:
-                    val = np.nan
-
-                self._setitem_single_column(loc, val, pi)
+            self.obj._mgr = self.obj._mgr.setitem_blockwise((pi, ilocs), value)
+            self.obj._clear_item_cache()
 
         elif not unique_cols:
             raise ValueError("Setting with non-unique columns is not allowed.")
@@ -1778,9 +1794,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
             for loc in ilocs:
                 item = self.obj.columns[loc]
                 if item in value:
-                    sub_indexer[1] = item
                     val = self._align_series(
-                        tuple(sub_indexer), value[item], multiindex_indexer
+                        (pi, item), value[item], multiindex_indexer
                     )
                 else:
                     val = np.nan

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -72,7 +72,7 @@
 )
 from pandas.core.base import PandasObject
 import pandas.core.common as com
-from pandas.core.construction import extract_array
+from pandas.core.construction import ensure_wrapped_if_datetimelike, extract_array
 from pandas.core.indexers import (
     check_setitem_lengths,
     is_empty_indexer,
@@ -901,10 +901,60 @@ def setitem(self, indexer, value):
             if self.is_numeric:
                 value = np.nan
 
-        # coerce if block dtype can store value
         values = self.values
+
+        # FIXME: avoid getting here with DataFrame value; ambiguous casting
+        if is_extension_array_dtype(getattr(value, "dtype", None)):
+            # We need to be careful not to allow through strings that
+            #  can be parsed to EADtypes
+            is_ea_value = True
+            arr_value = value
+        else:
+            is_ea_value = False
+            arr_value = np.array(value)
+
+            # TODO: why the ndim restriction here?
+            if (
+                self.dtype == object
+                and arr_value.dtype.kind in ["m", "M"]
+                and arr_value.size > 0
+                and self.ndim == 2
+            ):
+                # get Timestamp/Timedelta, numpy would cast to ints (yikes!)
+                # FIXME: np.asarray(dta, dtype=object), dta.to_numpy(object)
+                #  both have the same wrong numpy behavior
+                arr_value = ensure_wrapped_if_datetimelike(arr_value)
+                arr_value = np.asarray(arr_value.astype(object))
+                value = arr_value
+
+        if transpose:
+            values = values.T
+
+        # length checking
+        check_setitem_lengths(indexer, value, values)
+        exact_match = is_exact_shape_match(values, arr_value)
+
         if not self._can_hold_element(value):
             # current dtype cannot store value, coerce to common dtype
+
+            is_full = exact_match or (
+                isinstance(indexer, tuple)
+                and len(indexer) == self.ndim
+                and com.is_null_slice(indexer[0])
+            )
+            if is_full:
+                # test_loc_setitem_consistency,
+                #  test_loc_setitem_consistency_dt64_to_float
+                value2 = lib.item_from_zerodim(value)
+                if lib.is_scalar(value2):
+                    # TODO: de-duplicate with similar in setitem_single_block
+                    value2 = np.full(self.shape, arr_value)
+                    return self.make_block(value2)
+                elif arr_value.shape == self.shape[::-1]:
+                    return self.make_block(arr_value.T)
+                else:
+                    assert False  # just checking we never get here
+
             # TODO: can we just use coerce_to_target_dtype for all this
             if hasattr(value, "dtype"):
                 dtype = value.dtype
@@ -930,21 +980,6 @@ def setitem(self, indexer, value):
             return self
 
         # value must be storable at this moment
-        if is_extension_array_dtype(getattr(value, "dtype", None)):
-            # We need to be careful not to allow through strings that
-            #  can be parsed to EADtypes
-            is_ea_value = True
-            arr_value = value
-        else:
-            is_ea_value = False
-            arr_value = np.array(value)
-
-        if transpose:
-            values = values.T
-
-        # length checking
-        check_setitem_lengths(indexer, value, values)
-        exact_match = is_exact_shape_match(values, arr_value)
         if is_empty_indexer(indexer, arr_value):
             # GH#8669 empty indexers
             pass
@@ -1666,6 +1701,26 @@ def setitem(self, indexer, value):
             # we are always 1-D
             indexer = indexer[0]
 
+            if isinstance(indexer, np.ndarray) and self.ndim == indexer.ndim == 2:
+                # possibly constructed  with maybe_convert_ix
+
+                indexer = indexer.squeeze()
+                indexer = np.atleast_1d(indexer)
+
+        if (
+            isinstance(value, (np.ndarray, ExtensionArray))
+            and value.ndim == self.ndim == 2
+        ):
+            # TODO: test for this
+            value = value.T
+            if value.shape[0] != 1:
+                raise ValueError
+            value = value[0]
+        elif isinstance(value, ABCDataFrame) and self.ndim == 2:
+            if value.shape[1] != 1:
+                raise ValueError
+            value = value._ixs(0, axis=1)._values
+
         check_setitem_lengths(indexer, value, self.values)
         self.values[indexer] = value
         return self