From 1ee81a5a77149e143549c69bd7d1d5e2b78be7bf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 1 Nov 2019 17:51:41 -0700
Subject: [PATCH 1/3] maybe_upcast_putmask: require other to be a scalar

---
 pandas/core/dtypes/cast.py              | 10 ++-
 pandas/core/nanops.py                   |  6 ++
 pandas/tests/dtypes/cast/test_upcast.py | 93 ++++++++++---------------
 3 files changed, 49 insertions(+), 60 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3e92906be706c..304eeac87f64d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -232,7 +232,7 @@ def trans(x):
     return result
 
 
-def maybe_upcast_putmask(result, mask, other):
+def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other):
     """
     A safe version of putmask that potentially upcasts the result.
     The result is replaced with the first N elements of other,
@@ -245,8 +245,8 @@ def maybe_upcast_putmask(result, mask, other):
         The destination array. This will be mutated in-place if no upcasting is
         necessary.
     mask : boolean ndarray
-    other : ndarray or scalar
-        The source array or value
+    other : scalar
+        The source value
 
     Returns
     -------
@@ -264,6 +264,10 @@ def maybe_upcast_putmask(result, mask, other):
 
     if not isinstance(result, np.ndarray):
         raise ValueError("The result input must be a ndarray.")
+    if not is_scalar(other):
+        # We _could_ support non-scalar other, but until we have a compelling
+        #  use case, we assume away the possibility.
+        raise ValueError("other must be a scalar")
 
     if mask.any():
         # Two conversions for date-like dtypes that can't be done automatically
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index b9267db76e1a8..5d4f75d93db1c 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -273,6 +273,12 @@ def _get_values(
     fill_value : Any
         fill value used
     """
+
+    # In _get_values is only called from within nanops, and in all cases
+    #  with scalar fill_value.  This guarantee is important for the
+    #  maybe_upcast_putmask call below
+    assert is_scalar(fill_value)
+
     mask = _maybe_get_mask(values, skipna, mask)
 
     if is_datetime64tz_dtype(values):
diff --git a/pandas/tests/dtypes/cast/test_upcast.py b/pandas/tests/dtypes/cast/test_upcast.py
index b22ed0bcd0a11..49e850f3e87b5 100644
--- a/pandas/tests/dtypes/cast/test_upcast.py
+++ b/pandas/tests/dtypes/cast/test_upcast.py
@@ -9,7 +9,7 @@
 
 @pytest.mark.parametrize("result", [Series([10, 11, 12]), [10, 11, 12], (10, 11, 12)])
 def test_upcast_error(result):
-    # GH23823
+    # GH23823 require result arg to be ndarray
     mask = np.array([False, True, False])
     other = np.array([61, 62, 63])
     with pytest.raises(ValueError):
@@ -17,76 +17,55 @@ def test_upcast_error(result):
 
 
 @pytest.mark.parametrize(
-    "arr, other, exp_changed, expected",
+    "arr, other",
     [
-        (np.arange(1, 6), np.array([61, 62, 63]), False, np.array([1, 61, 3, 62, 63])),
+        (np.arange(1, 6), np.array([61, 62, 63])),
+        (np.arange(1, 6), np.array([61.1, 62.2, 63.3])),
+        (np.arange(10, 15), np.array([61, 62])),
+        (np.arange(10, 15), np.array([61, np.nan])),
         (
-            np.arange(1, 6),
-            np.array([61.1, 62.2, 63.3]),
-            True,
-            np.array([1, 61.1, 3, 62.2, 63.3]),
+            np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"),
+            np.arange("2018-01-01", "2018-01-04", dtype="datetime64[D]"),
         ),
-        (np.arange(1, 6), np.nan, True, np.array([1, np.nan, 3, np.nan, np.nan])),
-        (np.arange(10, 15), np.array([61, 62]), False, np.array([10, 61, 12, 62, 61])),
         (
-            np.arange(10, 15),
-            np.array([61, np.nan]),
-            True,
-            np.array([10, 61, 12, np.nan, 61]),
+            np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"),
+            np.arange("2018-01-01", "2018-01-03", dtype="datetime64[D]"),
         ),
     ],
 )
-def test_upcast(arr, other, exp_changed, expected):
+def test_upcast_scalar_other(arr, other):
+    # for now we do not support non-scalar `other`
+    mask = np.array([False, True, False, True, True])
+    with pytest.raises(ValueError, match="other must be a scalar"):
+        maybe_upcast_putmask(arr, mask, other)
+
+
+def test_upcast():
     # GH23823
+    arr = np.arange(1, 6)
     mask = np.array([False, True, False, True, True])
-    result, changed = maybe_upcast_putmask(arr, mask, other)
+    result, changed = maybe_upcast_putmask(arr, mask, other=np.nan)
 
-    assert changed == exp_changed
+    expected = np.array([1, np.nan, 3, np.nan, np.nan])
+    assert changed
     tm.assert_numpy_array_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "arr, other, exp_changed, expected",
-    [
-        (
-            np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"),
-            np.arange("2018-01-01", "2018-01-04", dtype="datetime64[D]"),
-            False,
-            np.array(
-                ["2019-01-01", "2018-01-01", "2019-01-03", "2018-01-02", "2018-01-03"],
-                dtype="datetime64[D]",
-            ),
-        ),
-        (
-            np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"),
-            np.nan,
-            False,
-            np.array(
-                [
-                    "2019-01-01",
-                    np.datetime64("NaT"),
-                    "2019-01-03",
-                    np.datetime64("NaT"),
-                    np.datetime64("NaT"),
-                ],
-                dtype="datetime64[D]",
-            ),
-        ),
-        (
-            np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"),
-            np.arange("2018-01-01", "2018-01-03", dtype="datetime64[D]"),
-            False,
-            np.array(
-                ["2019-01-01", "2018-01-01", "2019-01-03", "2018-01-02", "2018-01-01"],
-                dtype="datetime64[D]",
-            ),
-        ),
-    ],
-)
-def test_upcast_datetime(arr, other, exp_changed, expected):
+def test_upcast_datetime():
     # GH23823
+    arr = np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]")
     mask = np.array([False, True, False, True, True])
-    result, changed = maybe_upcast_putmask(arr, mask, other)
+    result, changed = maybe_upcast_putmask(arr, mask, other=np.nan)
 
-    assert changed == exp_changed
+    expected = np.array(
+        [
+            "2019-01-01",
+            np.datetime64("NaT"),
+            "2019-01-03",
+            np.datetime64("NaT"),
+            np.datetime64("NaT"),
+        ],
+        dtype="datetime64[D]",
+    )
+    assert not changed
     tm.assert_numpy_array_equal(result, expected)

From 9a15ee14fff8271711f7798c56efe7a4984a6d3c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 2 Nov 2019 08:35:42 -0700
Subject: [PATCH 2/3] require maybe_upcast fill_value to be scalar

---
 pandas/core/dtypes/cast.py            | 2 ++
 pandas/core/internals/blocks.py       | 4 ++++
 pandas/core/internals/construction.py | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 304eeac87f64d..69c2e7fef365f 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -686,6 +686,8 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
     dtype : if None, then use the dtype of the values, else coerce to this type
     copy : if True always make a copy even if no upcast is required
     """
+    if not is_scalar(fill_value):
+        raise ValueError("fill_value must be a scalar")
 
     if is_extension_type(values):
         if copy:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 51108d9a5a573..1f5a14a41e6a3 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1286,6 +1286,10 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]:
     def shift(self, periods, axis=0, fill_value=None):
         """ shift the block by periods, possibly upcast """
 
+        if not lib.is_scalar(fill_value):
+            # We could go further and require e.g. self._can_hold_element(fv)
+            raise ValueError("fill_value must be a scalar")
+
         # convert integer to float if necessary. need to do a lot more than
         # that, handle boolean etc also
         new_values, fill_value = maybe_upcast(self.values, fill_value)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 176f4acd113fe..ba57232cb9e92 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -83,6 +83,7 @@ def masked_rec_array_to_mgr(data, index, columns, dtype, copy):
 
     # essentially process a record array then fill it
     fill_value = data.fill_value
+
     fdata = ma.getdata(data)
     if index is None:
         index = get_names_from_index(fdata)
@@ -97,6 +98,9 @@ def masked_rec_array_to_mgr(data, index, columns, dtype, copy):
     # fill if needed
     new_arrays = []
     for fv, arr, col in zip(fill_value, arrays, arr_columns):
+        # TODO: numpy docs suggest fv must be scalar, but could it be
+        #  non-scalar for object dtype?
+        assert lib.is_scalar(fv), fv
         mask = ma.getmaskarray(data[col])
         if mask.any():
             arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)

From 980237dd67f654e8cacb6c92177e9426f7a3e507 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 2 Nov 2019 09:23:34 -0700
Subject: [PATCH 3/3] revert whitespace change

---
 pandas/core/internals/construction.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index ba57232cb9e92..4a8216cc73264 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -83,7 +83,6 @@ def masked_rec_array_to_mgr(data, index, columns, dtype, copy):
 
     # essentially process a record array then fill it
     fill_value = data.fill_value
-
     fdata = ma.getdata(data)
     if index is None:
         index = get_names_from_index(fdata)